Index: bindings/go/llvm/ir.go =================================================================== --- bindings/go/llvm/ir.go +++ bindings/go/llvm/ir.go @@ -611,6 +611,12 @@ } // Operations on array, pointer, and vector types (sequence types) +func (t Type) Subtypes() (ret []Type) { + ret = make([]Type, C.LLVMGetNumContainedTypes(t.C)) + C.LLVMGetSubtypes(t.C, llvmTypeRefPtr(&ret[0])) + return +} + func ArrayType(elementType Type, elementCount int) (t Type) { t.C = C.LLVMArrayType(elementType.C, C.unsigned(elementCount)) return Index: bindings/go/llvm/ir_test.go =================================================================== --- bindings/go/llvm/ir_test.go +++ bindings/go/llvm/ir_test.go @@ -134,3 +134,29 @@ t.Errorf("Got metadata %v as scope, though wanted %v", loc.Scope.C, scope.C) } } + +func TestSubtypes(t *testing.T) { + cont := NewContext() + defer cont.Dispose() + + int_pointer := PointerType(cont.Int32Type(), 0) + int_inner := int_pointer.Subtypes() + if len(int_inner) != 1 { + t.Errorf("Got size %d, though wanted 1") + } + if int_inner[0] != cont.Int32Type() { + t.Errorf("Expected int32 type") + } + + st_pointer := cont.StructType([]Type{cont.Int32Type(), cont.Int8Type()}, false) + st_inner := st_pointer.Subtypes() + if len(st_inner) != 2 { + t.Errorf("Got size %d, though wanted 2") + } + if st_inner[0] != cont.Int32Type() { + t.Errorf("Expected first struct field to be int32") + } + if st_inner[1] != cont.Int8Type() { + t.Errorf("Expected second struct field to be int8") + } +} Index: docs/ReleaseNotes.rst =================================================================== --- docs/ReleaseNotes.rst +++ docs/ReleaseNotes.rst @@ -43,6 +43,13 @@ * LLVM's ``WeakVH`` has been renamed to ``WeakTrackingVH`` and a new ``WeakVH`` has been introduced. The new ``WeakVH`` nulls itself out on deletion, but does not track values across RAUW. + +* A new library named ``BinaryFormat`` has been created which holds a collection + of code which previously lived in ``Support``. This includes the + ``file_magic`` structure and ``identify_magic`` functions, as well as all the + structure and type definitions for DWARF, ELF, COFF, WASM, and MachO file + formats. + * ... next change ... Index: include/llvm/Analysis/BranchProbabilityInfo.h =================================================================== --- include/llvm/Analysis/BranchProbabilityInfo.h +++ include/llvm/Analysis/BranchProbabilityInfo.h @@ -26,6 +26,7 @@ namespace llvm { class LoopInfo; +class TargetLibraryInfo; class raw_ostream; /// \brief Analysis providing branch probability information. @@ -43,8 +44,9 @@ class BranchProbabilityInfo { public: BranchProbabilityInfo() {} - BranchProbabilityInfo(const Function &F, const LoopInfo &LI) { - calculate(F, LI); + BranchProbabilityInfo(const Function &F, const LoopInfo &LI, + const TargetLibraryInfo *TLI = nullptr) { + calculate(F, LI, TLI); } BranchProbabilityInfo(BranchProbabilityInfo &&Arg) @@ -116,7 +118,8 @@ return IsLikely ? LikelyProb : LikelyProb.getCompl(); } - void calculate(const Function &F, const LoopInfo &LI); + void calculate(const Function &F, const LoopInfo &LI, + const TargetLibraryInfo *TLI = nullptr); /// Forget analysis results for the given basic block. void eraseBlock(const BasicBlock *BB); @@ -171,7 +174,7 @@ bool calcColdCallHeuristics(const BasicBlock *BB); bool calcPointerHeuristics(const BasicBlock *BB); bool calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI); - bool calcZeroHeuristics(const BasicBlock *BB); + bool calcZeroHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI); bool calcFloatingPointHeuristics(const BasicBlock *BB); bool calcInvokeHeuristics(const BasicBlock *BB); }; Index: include/llvm/Analysis/LazyBranchProbabilityInfo.h =================================================================== --- include/llvm/Analysis/LazyBranchProbabilityInfo.h +++ include/llvm/Analysis/LazyBranchProbabilityInfo.h @@ -24,6 +24,7 @@ class AnalysisUsage; class Function; class LoopInfo; +class TargetLibraryInfo; /// \brief This is an alternative analysis pass to /// BranchProbabilityInfoWrapperPass. The difference is that with this pass the @@ -55,14 +56,15 @@ /// analysis without paying for the overhead if BPI doesn't end up being used. class LazyBranchProbabilityInfo { public: - LazyBranchProbabilityInfo(const Function *F, const LoopInfo *LI) - : Calculated(false), F(F), LI(LI) {} + LazyBranchProbabilityInfo(const Function *F, const LoopInfo *LI, + const TargetLibraryInfo *TLI) + : Calculated(false), F(F), LI(LI), TLI(TLI) {} /// Retrieve the BPI with the branch probabilities computed. BranchProbabilityInfo &getCalculated() { if (!Calculated) { assert(F && LI && "call setAnalysis"); - BPI.calculate(*F, *LI); + BPI.calculate(*F, *LI, TLI); Calculated = true; } return BPI; @@ -77,6 +79,7 @@ bool Calculated; const Function *F; const LoopInfo *LI; + const TargetLibraryInfo *TLI; }; std::unique_ptr LBPI; Index: include/llvm/Analysis/TargetLibraryInfo.h =================================================================== --- include/llvm/Analysis/TargetLibraryInfo.h +++ include/llvm/Analysis/TargetLibraryInfo.h @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -239,6 +240,13 @@ return Impl->getLibFunc(FDecl, F); } + /// If a callsite does not have the 'nobuiltin' attribute, return if the + /// called function is a known library function and set F to that function. + bool getLibFunc(ImmutableCallSite CS, LibFunc &F) const { + return !CS.isNoBuiltin() && CS.getCalledFunction() && + getLibFunc(*(CS.getCalledFunction()), F); + } + /// Tests whether a library function is available. bool has(LibFunc F) const { return Impl->getState(F) != TargetLibraryInfoImpl::Unavailable; Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -1092,10 +1092,10 @@ /// \----------------v-------------/ \----------v------------/ /// n/2 elements n/2 elements /// %red1 = op %val, val1 - /// After this operation we have a vector %red1 with only maningfull the - /// first n/2 elements, the second n/2 elements are undefined and can be + /// After this operation we have a vector %red1 where only the first n/2 + /// elements are meaningful, the second n/2 elements are undefined and can be /// dropped. All other operations are actually working with the vector of - /// length n/2, not n. though the real vector length is still n. + /// length n/2, not n, though the real vector length is still n. /// %val2 = shufflevector %red1, %undef, /// /// \----------------v-------------/ \----------v------------/ Index: include/llvm/DebugInfo/PDB/PDBSymbol.h =================================================================== --- include/llvm/DebugInfo/PDB/PDBSymbol.h +++ include/llvm/DebugInfo/PDB/PDBSymbol.h @@ -89,6 +89,8 @@ template std::unique_ptr findOneChild() const { auto Enumerator(findAllChildren()); + if (!Enumerator) + return nullptr; return Enumerator->getNext(); } @@ -97,6 +99,8 @@ template std::unique_ptr> findAllChildren() const { auto BaseIter = RawSymbol->findChildren(T::Tag); + if (!BaseIter) + return nullptr; return llvm::make_unique>(std::move(BaseIter)); } std::unique_ptr findAllChildren(PDB_SymType Type) const; Index: lib/Analysis/BranchProbabilityInfo.cpp =================================================================== --- lib/Analysis/BranchProbabilityInfo.cpp +++ lib/Analysis/BranchProbabilityInfo.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -30,6 +31,7 @@ INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) @@ -457,7 +459,8 @@ return true; } -bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB) { +bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, + const TargetLibraryInfo *TLI) { const BranchInst *BI = dyn_cast(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; @@ -480,8 +483,37 @@ if (AndRHS->getUniqueInteger().isPowerOf2()) return false; + // Check if the LHS is the return value of a library function + LibFunc Func = NumLibFuncs; + if (TLI) + if (CallInst *Call = dyn_cast(CI->getOperand(0))) + if (Function *CalledFn = Call->getCalledFunction()) + TLI->getLibFunc(*CalledFn, Func); + bool isProb; - if (CV->isZero()) { + if (Func == LibFunc_strcasecmp || + Func == LibFunc_strcmp || + Func == LibFunc_strncasecmp || + Func == LibFunc_strncmp || + Func == LibFunc_memcmp) { + // strcmp and similar functions return zero, negative, or positive, if the + // first string is equal, less, or greater than the second. We consider it + // likely that the strings are not equal, so a comparison with zero is + // probably false, but also a comparison with any other number is also + // probably false given that what exactly is returned for nonzero values is + // not specified. Any kind of comparison other than equality we know + // nothing about. + switch (CI->getPredicate()) { + case CmpInst::ICMP_EQ: + isProb = false; + break; + case CmpInst::ICMP_NE: + isProb = true; + break; + default: + return false; + } + } else if (CV->isZero()) { switch (CI->getPredicate()) { case CmpInst::ICMP_EQ: // X == 0 -> Unlikely @@ -707,7 +739,8 @@ } } -void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) { +void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, + const TargetLibraryInfo *TLI) { DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() << " ----\n\n"); LastF = &F; // Store the last function we ran on for printing. @@ -733,7 +766,7 @@ continue; if (calcPointerHeuristics(BB)) continue; - if (calcZeroHeuristics(BB)) + if (calcZeroHeuristics(BB, TLI)) continue; if (calcFloatingPointHeuristics(BB)) continue; @@ -747,12 +780,14 @@ void BranchProbabilityInfoWrapperPass::getAnalysisUsage( AnalysisUsage &AU) const { AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); } bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { const LoopInfo &LI = getAnalysis().getLoopInfo(); - BPI.calculate(F, LI); + const TargetLibraryInfo &TLI = getAnalysis().getTLI(); + BPI.calculate(F, LI, &TLI); return false; } @@ -767,7 +802,7 @@ BranchProbabilityInfo BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) { BranchProbabilityInfo BPI; - BPI.calculate(F, AM.getResult(F)); + BPI.calculate(F, AM.getResult(F), &AM.getResult(F)); return BPI; } Index: lib/Analysis/LazyBranchProbabilityInfo.cpp =================================================================== --- lib/Analysis/LazyBranchProbabilityInfo.cpp +++ lib/Analysis/LazyBranchProbabilityInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/LazyBranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; @@ -24,6 +25,7 @@ INITIALIZE_PASS_BEGIN(LazyBranchProbabilityInfoPass, DEBUG_TYPE, "Lazy Branch Probability Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(LazyBranchProbabilityInfoPass, DEBUG_TYPE, "Lazy Branch Probability Analysis", true, true) @@ -41,6 +43,7 @@ void LazyBranchProbabilityInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); } @@ -48,16 +51,19 @@ bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) { LoopInfo &LI = getAnalysis().getLoopInfo(); - LBPI = llvm::make_unique(&F, &LI); + TargetLibraryInfo &TLI = getAnalysis().getTLI(); + LBPI = llvm::make_unique(&F, &LI, &TLI); return false; } void LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AnalysisUsage &AU) { AU.addRequired(); AU.addRequired(); + AU.addRequired(); } void llvm::initializeLazyBPIPassPass(PassRegistry &Registry) { INITIALIZE_PASS_DEPENDENCY(LazyBranchProbabilityInfoPass); INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); + INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass); } Index: lib/Analysis/LazyValueInfo.cpp =================================================================== --- lib/Analysis/LazyValueInfo.cpp +++ lib/Analysis/LazyValueInfo.cpp @@ -302,7 +302,7 @@ /// contradictory. If this happens, we return some valid lattice value so as /// not confuse the rest of LVI. Ideally, we'd always return Undefined, but /// we do not make this guarantee. TODO: This would be a useful enhancement. -static LVILatticeVal intersect(LVILatticeVal A, LVILatticeVal B) { +static LVILatticeVal intersect(const LVILatticeVal &A, const LVILatticeVal &B) { // Undefined is the strongest state. It means the value is known to be along // an unreachable path. if (A.isUndefined()) Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -235,12 +235,12 @@ void eliminateMostlyEmptyBlock(BasicBlock *BB); bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, bool isPreheader); - bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT); - bool optimizeInst(Instruction *I, bool& ModifiedDT); + bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); + bool optimizeInst(Instruction *I, bool &ModifiedDT); bool optimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy, unsigned AS); bool optimizeInlineAsmInst(CallInst *CS); - bool optimizeCallInst(CallInst *CI, bool& ModifiedDT); + bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); bool optimizeExt(Instruction *&I); bool optimizeExtUses(Instruction *I); bool optimizeLoadExt(LoadInst *I); @@ -1675,6 +1675,7 @@ void emitLoadCompareByteBlock(unsigned Index, int GEPIndex); void emitMemCmpResultBlock(bool IsLittleEndian); Value *getMemCmpExpansionZeroCase(unsigned Size, bool IsLittleEndian); + Value *getMemCmpEqZeroOneBlock(unsigned Size); unsigned getLoadSize(unsigned Size); unsigned getNumLoads(unsigned Size); @@ -1699,31 +1700,35 @@ unsigned MaxLoadSize, unsigned LoadsPerBlock) : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock) { - IRBuilder<> Builder(CI->getContext()); - BasicBlock *StartBlock = CI->getParent(); - EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); - setupEndBlockPHINodes(); + // A memcmp with zero-comparison with only one block of load and compare does + // not need to set up any extra blocks. This case could be handled in the DAG, + // but since we have all of the machinery to flexibly expand any memcpy here, + // we choose to handle this case too to avoid fragmented lowering. IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); - - // Calculate how many load compare blocks are required for an expansion of - // given Size. NumBlocks = calculateNumBlocks(Size); - createResultBlock(); + if (!IsUsedForZeroCmp || NumBlocks != 1) { + BasicBlock *StartBlock = CI->getParent(); + EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); + setupEndBlockPHINodes(); + createResultBlock(); - // If return value of memcmp is not used in a zero equality, we need to - // calculate which source was larger. The calculation requires the - // two loaded source values of each load compare block. - // These will be saved in the phi nodes created by setupResultBlockPHINodes. - if (!IsUsedForZeroCmp) - setupResultBlockPHINodes(); + // If return value of memcmp is not used in a zero equality, we need to + // calculate which source was larger. The calculation requires the + // two loaded source values of each load compare block. + // These will be saved in the phi nodes created by setupResultBlockPHINodes. + if (!IsUsedForZeroCmp) + setupResultBlockPHINodes(); - // Create the number of required load compare basic blocks. - createLoadCmpBlocks(); + // Create the number of required load compare basic blocks. + createLoadCmpBlocks(); + + // Update the terminator added by splitBasicBlock to branch to the first + // LoadCmpBlock. + StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); + } - // Update the terminator added by splitBasicBlock to branch to the first - // LoadCmpBlock. + IRBuilder<> Builder(CI->getContext()); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); } void MemCmpExpansion::createLoadCmpBlocks() { @@ -1810,7 +1815,12 @@ unsigned NumLoadsRemaining = getNumLoads(RemainingBytes); unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock); - Builder.SetInsertPoint(LoadCmpBlocks[Index]); + // For a single-block expansion, start inserting before the memcmp call. + if (LoadCmpBlocks.empty()) + Builder.SetInsertPoint(CI); + else + Builder.SetInsertPoint(LoadCmpBlocks[Index]); + Value *Cmp = nullptr; for (unsigned i = 0; i < NumLoads; ++i) { unsigned LoadSize = getLoadSize(RemainingBytes); @@ -2071,11 +2081,22 @@ return PhiRes; } +/// A memcmp expansion that compares equality with 0 and only has one block of +/// load and compare can bypass the compare, branch, and phi IR that is required +/// in the general case. +Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) { + unsigned NumBytesProcessed = 0; + IRBuilder<> Builder(CI->getContext()); + Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed, Builder); + return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); +} + // This function expands the memcmp call into an inline expansion and returns // the memcmp result. Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size, bool IsLittleEndian) { if (IsUsedForZeroCmp) - return getMemCmpExpansionZeroCase(Size, IsLittleEndian); + return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) : + getMemCmpExpansionZeroCase(Size, IsLittleEndian); // This loop calls emitLoadCompareBlock for comparing Size bytes of the two // memcmp sources. It starts with loading using the maximum load size set by @@ -2232,7 +2253,7 @@ return true; } -bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { +bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { BasicBlock *BB = CI->getParent(); // Lower inline assembly if we can. @@ -2385,12 +2406,10 @@ } LibFunc Func; - if (TLInfo->getLibFunc(*CI->getCalledFunction(), Func) && - Func == LibFunc_memcmp) { - if (expandMemCmp(CI, TTI, TLI, DL)) { - ModifiedDT = true; - return true; - } + if (TLInfo->getLibFunc(ImmutableCallSite(CI), Func) && + Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TLI, DL)) { + ModifiedDT = true; + return true; } return false; } @@ -6018,7 +6037,7 @@ return true; } -bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { +bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. if (InsertedInsts.count(I)) @@ -6173,7 +6192,7 @@ // In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. -bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) { +bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { SunkAddrs.clear(); bool MadeChange = false; Index: lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp =================================================================== --- lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp +++ lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp @@ -372,8 +372,11 @@ enum SymTagEnum EnumVal = static_cast(Type); CComPtr DiaEnumerator; - if (S_OK != Symbol->findChildrenEx(EnumVal, nullptr, nsNone, &DiaEnumerator)) - return nullptr; + if (S_OK != + Symbol->findChildrenEx(EnumVal, nullptr, nsNone, &DiaEnumerator)) { + if (S_OK != Symbol->findChildren(EnumVal, nullptr, nsNone, &DiaEnumerator)) + return nullptr; + } return llvm::make_unique(Session, DiaEnumerator); } Index: lib/MC/MCExpr.cpp =================================================================== --- lib/MC/MCExpr.cpp +++ lib/MC/MCExpr.cpp @@ -655,8 +655,12 @@ // the OS X assembler will completely drop the 4. We should probably // include it in the relocation or produce an error if that is not // possible. + // Allow constant expressions. if (!A && !B) return true; + // Allows aliases with zero offset. + if (Res.getConstant() == 0 && (!A || !B)) + return true; } } Index: lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp @@ -0,0 +1,353 @@ +//===-- AMDGPURegAsmNames.inc - Register asm names ----------*- C++ -*-----===// + +#ifdef AMDGPU_REG_ASM_NAMES + +static const char *const VGPR32RegNames[] = { + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", + "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", + "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", + "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", + "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", + "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", + "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", + "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", + "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", + "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", + "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", + "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", + "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", + "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", + "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", + "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", + "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", + "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", + "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", + "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", + "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", + "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", + "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", + "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", + "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", + "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", + "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", + "v252", "v253", "v254", "v255" +}; + +static const char *const SGPR32RegNames[] = { + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", + "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", + "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", + "s30", "s31", "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", + "s40", "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", + "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", "s59", + "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", "s68", "s69", + "s70", "s71", "s72", "s73", "s74", "s75", "s76", "s77", "s78", "s79", + "s80", "s81", "s82", "s83", "s84", "s85", "s86", "s87", "s88", "s89", + "s90", "s91", "s92", "s93", "s94", "s95", "s96", "s97", "s98", "s99", + "s100", "s101", "s102", "s103" +}; + +static const char *const VGPR64RegNames[] = { + "v[0:1]", "v[1:2]", "v[2:3]", "v[3:4]", "v[4:5]", + "v[5:6]", "v[6:7]", "v[7:8]", "v[8:9]", "v[9:10]", + "v[10:11]", "v[11:12]", "v[12:13]", "v[13:14]", "v[14:15]", + "v[15:16]", "v[16:17]", "v[17:18]", "v[18:19]", "v[19:20]", + "v[20:21]", "v[21:22]", "v[22:23]", "v[23:24]", "v[24:25]", + "v[25:26]", "v[26:27]", "v[27:28]", "v[28:29]", "v[29:30]", + "v[30:31]", "v[31:32]", "v[32:33]", "v[33:34]", "v[34:35]", + "v[35:36]", "v[36:37]", "v[37:38]", "v[38:39]", "v[39:40]", + "v[40:41]", "v[41:42]", "v[42:43]", "v[43:44]", "v[44:45]", + "v[45:46]", "v[46:47]", "v[47:48]", "v[48:49]", "v[49:50]", + "v[50:51]", "v[51:52]", "v[52:53]", "v[53:54]", "v[54:55]", + "v[55:56]", "v[56:57]", "v[57:58]", "v[58:59]", "v[59:60]", + "v[60:61]", "v[61:62]", "v[62:63]", "v[63:64]", "v[64:65]", + "v[65:66]", "v[66:67]", "v[67:68]", "v[68:69]", "v[69:70]", + "v[70:71]", "v[71:72]", "v[72:73]", "v[73:74]", "v[74:75]", + "v[75:76]", "v[76:77]", "v[77:78]", "v[78:79]", "v[79:80]", + "v[80:81]", "v[81:82]", "v[82:83]", "v[83:84]", "v[84:85]", + "v[85:86]", "v[86:87]", "v[87:88]", "v[88:89]", "v[89:90]", + "v[90:91]", "v[91:92]", "v[92:93]", "v[93:94]", "v[94:95]", + "v[95:96]", "v[96:97]", "v[97:98]", "v[98:99]", "v[99:100]", + "v[100:101]", "v[101:102]", "v[102:103]", "v[103:104]", "v[104:105]", + "v[105:106]", "v[106:107]", "v[107:108]", "v[108:109]", "v[109:110]", + "v[110:111]", "v[111:112]", "v[112:113]", "v[113:114]", "v[114:115]", + "v[115:116]", "v[116:117]", "v[117:118]", "v[118:119]", "v[119:120]", + "v[120:121]", "v[121:122]", "v[122:123]", "v[123:124]", "v[124:125]", + "v[125:126]", "v[126:127]", "v[127:128]", "v[128:129]", "v[129:130]", + "v[130:131]", "v[131:132]", "v[132:133]", "v[133:134]", "v[134:135]", + "v[135:136]", "v[136:137]", "v[137:138]", "v[138:139]", "v[139:140]", + "v[140:141]", "v[141:142]", "v[142:143]", "v[143:144]", "v[144:145]", + "v[145:146]", "v[146:147]", "v[147:148]", "v[148:149]", "v[149:150]", + "v[150:151]", "v[151:152]", "v[152:153]", "v[153:154]", "v[154:155]", + "v[155:156]", "v[156:157]", "v[157:158]", "v[158:159]", "v[159:160]", + "v[160:161]", "v[161:162]", "v[162:163]", "v[163:164]", "v[164:165]", + "v[165:166]", "v[166:167]", "v[167:168]", "v[168:169]", "v[169:170]", + "v[170:171]", "v[171:172]", "v[172:173]", "v[173:174]", "v[174:175]", + "v[175:176]", "v[176:177]", "v[177:178]", "v[178:179]", "v[179:180]", + "v[180:181]", "v[181:182]", "v[182:183]", "v[183:184]", "v[184:185]", + "v[185:186]", "v[186:187]", "v[187:188]", "v[188:189]", "v[189:190]", + "v[190:191]", "v[191:192]", "v[192:193]", "v[193:194]", "v[194:195]", + "v[195:196]", "v[196:197]", "v[197:198]", "v[198:199]", "v[199:200]", + "v[200:201]", "v[201:202]", "v[202:203]", "v[203:204]", "v[204:205]", + "v[205:206]", "v[206:207]", "v[207:208]", "v[208:209]", "v[209:210]", + "v[210:211]", "v[211:212]", "v[212:213]", "v[213:214]", "v[214:215]", + "v[215:216]", "v[216:217]", "v[217:218]", "v[218:219]", "v[219:220]", + "v[220:221]", "v[221:222]", "v[222:223]", "v[223:224]", "v[224:225]", + "v[225:226]", "v[226:227]", "v[227:228]", "v[228:229]", "v[229:230]", + "v[230:231]", "v[231:232]", "v[232:233]", "v[233:234]", "v[234:235]", + "v[235:236]", "v[236:237]", "v[237:238]", "v[238:239]", "v[239:240]", + "v[240:241]", "v[241:242]", "v[242:243]", "v[243:244]", "v[244:245]", + "v[245:246]", "v[246:247]", "v[247:248]", "v[248:249]", "v[249:250]", + "v[250:251]", "v[251:252]", "v[252:253]", "v[253:254]", "v[254:255]" +}; + +static const char *const VGPR96RegNames[] = { + "v[0:2]", "v[1:3]", "v[2:4]", "v[3:5]", "v[4:6]", + "v[5:7]", "v[6:8]", "v[7:9]", "v[8:10]", "v[9:11]", + "v[10:12]", "v[11:13]", "v[12:14]", "v[13:15]", "v[14:16]", + "v[15:17]", "v[16:18]", "v[17:19]", "v[18:20]", "v[19:21]", + "v[20:22]", "v[21:23]", "v[22:24]", "v[23:25]", "v[24:26]", + "v[25:27]", "v[26:28]", "v[27:29]", "v[28:30]", "v[29:31]", + "v[30:32]", "v[31:33]", "v[32:34]", "v[33:35]", "v[34:36]", + "v[35:37]", "v[36:38]", "v[37:39]", "v[38:40]", "v[39:41]", + "v[40:42]", "v[41:43]", "v[42:44]", "v[43:45]", "v[44:46]", + "v[45:47]", "v[46:48]", "v[47:49]", "v[48:50]", "v[49:51]", + "v[50:52]", "v[51:53]", "v[52:54]", "v[53:55]", "v[54:56]", + "v[55:57]", "v[56:58]", "v[57:59]", "v[58:60]", "v[59:61]", + "v[60:62]", "v[61:63]", "v[62:64]", "v[63:65]", "v[64:66]", + "v[65:67]", "v[66:68]", "v[67:69]", "v[68:70]", "v[69:71]", + "v[70:72]", "v[71:73]", "v[72:74]", "v[73:75]", "v[74:76]", + "v[75:77]", "v[76:78]", "v[77:79]", "v[78:80]", "v[79:81]", + "v[80:82]", "v[81:83]", "v[82:84]", "v[83:85]", "v[84:86]", + "v[85:87]", "v[86:88]", "v[87:89]", "v[88:90]", "v[89:91]", + "v[90:92]", "v[91:93]", "v[92:94]", "v[93:95]", "v[94:96]", + "v[95:97]", "v[96:98]", "v[97:99]", "v[98:100]", "v[99:101]", + "v[100:102]", "v[101:103]", "v[102:104]", "v[103:105]", "v[104:106]", + "v[105:107]", "v[106:108]", "v[107:109]", "v[108:110]", "v[109:111]", + "v[110:112]", "v[111:113]", "v[112:114]", "v[113:115]", "v[114:116]", + "v[115:117]", "v[116:118]", "v[117:119]", "v[118:120]", "v[119:121]", + "v[120:122]", "v[121:123]", "v[122:124]", "v[123:125]", "v[124:126]", + "v[125:127]", "v[126:128]", "v[127:129]", "v[128:130]", "v[129:131]", + "v[130:132]", "v[131:133]", "v[132:134]", "v[133:135]", "v[134:136]", + "v[135:137]", "v[136:138]", "v[137:139]", "v[138:140]", "v[139:141]", + "v[140:142]", "v[141:143]", "v[142:144]", "v[143:145]", "v[144:146]", + "v[145:147]", "v[146:148]", "v[147:149]", "v[148:150]", "v[149:151]", + "v[150:152]", "v[151:153]", "v[152:154]", "v[153:155]", "v[154:156]", + "v[155:157]", "v[156:158]", "v[157:159]", "v[158:160]", "v[159:161]", + "v[160:162]", "v[161:163]", "v[162:164]", "v[163:165]", "v[164:166]", + "v[165:167]", "v[166:168]", "v[167:169]", "v[168:170]", "v[169:171]", + "v[170:172]", "v[171:173]", "v[172:174]", "v[173:175]", "v[174:176]", + "v[175:177]", "v[176:178]", "v[177:179]", "v[178:180]", "v[179:181]", + "v[180:182]", "v[181:183]", "v[182:184]", "v[183:185]", "v[184:186]", + "v[185:187]", "v[186:188]", "v[187:189]", "v[188:190]", "v[189:191]", + "v[190:192]", "v[191:193]", "v[192:194]", "v[193:195]", "v[194:196]", + "v[195:197]", "v[196:198]", "v[197:199]", "v[198:200]", "v[199:201]", + "v[200:202]", "v[201:203]", "v[202:204]", "v[203:205]", "v[204:206]", + "v[205:207]", "v[206:208]", "v[207:209]", "v[208:210]", "v[209:211]", + "v[210:212]", "v[211:213]", "v[212:214]", "v[213:215]", "v[214:216]", + "v[215:217]", "v[216:218]", "v[217:219]", "v[218:220]", "v[219:221]", + "v[220:222]", "v[221:223]", "v[222:224]", "v[223:225]", "v[224:226]", + "v[225:227]", "v[226:228]", "v[227:229]", "v[228:230]", "v[229:231]", + "v[230:232]", "v[231:233]", "v[232:234]", "v[233:235]", "v[234:236]", + "v[235:237]", "v[236:238]", "v[237:239]", "v[238:240]", "v[239:241]", + "v[240:242]", "v[241:243]", "v[242:244]", "v[243:245]", "v[244:246]", + "v[245:247]", "v[246:248]", "v[247:249]", "v[248:250]", "v[249:251]", + "v[250:252]", "v[251:253]", "v[252:254]", "v[253:255]" +}; + +static const char *const VGPR128RegNames[] = { + "v[0:3]", "v[1:4]", "v[2:5]", "v[3:6]", "v[4:7]", + "v[5:8]", "v[6:9]", "v[7:10]", "v[8:11]", "v[9:12]", + "v[10:13]", "v[11:14]", "v[12:15]", "v[13:16]", "v[14:17]", + "v[15:18]", "v[16:19]", "v[17:20]", "v[18:21]", "v[19:22]", + "v[20:23]", "v[21:24]", "v[22:25]", "v[23:26]", "v[24:27]", + "v[25:28]", "v[26:29]", "v[27:30]", "v[28:31]", "v[29:32]", + "v[30:33]", "v[31:34]", "v[32:35]", "v[33:36]", "v[34:37]", + "v[35:38]", "v[36:39]", "v[37:40]", "v[38:41]", "v[39:42]", + "v[40:43]", "v[41:44]", "v[42:45]", "v[43:46]", "v[44:47]", + "v[45:48]", "v[46:49]", "v[47:50]", "v[48:51]", "v[49:52]", + "v[50:53]", "v[51:54]", "v[52:55]", "v[53:56]", "v[54:57]", + "v[55:58]", "v[56:59]", "v[57:60]", "v[58:61]", "v[59:62]", + "v[60:63]", "v[61:64]", "v[62:65]", "v[63:66]", "v[64:67]", + "v[65:68]", "v[66:69]", "v[67:70]", "v[68:71]", "v[69:72]", + "v[70:73]", "v[71:74]", "v[72:75]", "v[73:76]", "v[74:77]", + "v[75:78]", "v[76:79]", "v[77:80]", "v[78:81]", "v[79:82]", + "v[80:83]", "v[81:84]", "v[82:85]", "v[83:86]", "v[84:87]", + "v[85:88]", "v[86:89]", "v[87:90]", "v[88:91]", "v[89:92]", + "v[90:93]", "v[91:94]", "v[92:95]", "v[93:96]", "v[94:97]", + "v[95:98]", "v[96:99]", "v[97:100]", "v[98:101]", "v[99:102]", + "v[100:103]", "v[101:104]", "v[102:105]", "v[103:106]", "v[104:107]", + "v[105:108]", "v[106:109]", "v[107:110]", "v[108:111]", "v[109:112]", + "v[110:113]", "v[111:114]", "v[112:115]", "v[113:116]", "v[114:117]", + "v[115:118]", "v[116:119]", "v[117:120]", "v[118:121]", "v[119:122]", + "v[120:123]", "v[121:124]", "v[122:125]", "v[123:126]", "v[124:127]", + "v[125:128]", "v[126:129]", "v[127:130]", "v[128:131]", "v[129:132]", + "v[130:133]", "v[131:134]", "v[132:135]", "v[133:136]", "v[134:137]", + "v[135:138]", "v[136:139]", "v[137:140]", "v[138:141]", "v[139:142]", + "v[140:143]", "v[141:144]", "v[142:145]", "v[143:146]", "v[144:147]", + "v[145:148]", "v[146:149]", "v[147:150]", "v[148:151]", "v[149:152]", + "v[150:153]", "v[151:154]", "v[152:155]", "v[153:156]", "v[154:157]", + "v[155:158]", "v[156:159]", "v[157:160]", "v[158:161]", "v[159:162]", + "v[160:163]", "v[161:164]", "v[162:165]", "v[163:166]", "v[164:167]", + "v[165:168]", "v[166:169]", "v[167:170]", "v[168:171]", "v[169:172]", + "v[170:173]", "v[171:174]", "v[172:175]", "v[173:176]", "v[174:177]", + "v[175:178]", "v[176:179]", "v[177:180]", "v[178:181]", "v[179:182]", + "v[180:183]", "v[181:184]", "v[182:185]", "v[183:186]", "v[184:187]", + "v[185:188]", "v[186:189]", "v[187:190]", "v[188:191]", "v[189:192]", + "v[190:193]", "v[191:194]", "v[192:195]", "v[193:196]", "v[194:197]", + "v[195:198]", "v[196:199]", "v[197:200]", "v[198:201]", "v[199:202]", + "v[200:203]", "v[201:204]", "v[202:205]", "v[203:206]", "v[204:207]", + "v[205:208]", "v[206:209]", "v[207:210]", "v[208:211]", "v[209:212]", + "v[210:213]", "v[211:214]", "v[212:215]", "v[213:216]", "v[214:217]", + "v[215:218]", "v[216:219]", "v[217:220]", "v[218:221]", "v[219:222]", + "v[220:223]", "v[221:224]", "v[222:225]", "v[223:226]", "v[224:227]", + "v[225:228]", "v[226:229]", "v[227:230]", "v[228:231]", "v[229:232]", + "v[230:233]", "v[231:234]", "v[232:235]", "v[233:236]", "v[234:237]", + "v[235:238]", "v[236:239]", "v[237:240]", "v[238:241]", "v[239:242]", + "v[240:243]", "v[241:244]", "v[242:245]", "v[243:246]", "v[244:247]", + "v[245:248]", "v[246:249]", "v[247:250]", "v[248:251]", "v[249:252]", + "v[250:253]", "v[251:254]", "v[252:255]" +}; + +static const char *const VGPR256RegNames[] = { + "v[0:7]", "v[1:8]", "v[2:9]", "v[3:10]", "v[4:11]", + "v[5:12]", "v[6:13]", "v[7:14]", "v[8:15]", "v[9:16]", + "v[10:17]", "v[11:18]", "v[12:19]", "v[13:20]", "v[14:21]", + "v[15:22]", "v[16:23]", "v[17:24]", "v[18:25]", "v[19:26]", + "v[20:27]", "v[21:28]", "v[22:29]", "v[23:30]", "v[24:31]", + "v[25:32]", "v[26:33]", "v[27:34]", "v[28:35]", "v[29:36]", + "v[30:37]", "v[31:38]", "v[32:39]", "v[33:40]", "v[34:41]", + "v[35:42]", "v[36:43]", "v[37:44]", "v[38:45]", "v[39:46]", + "v[40:47]", "v[41:48]", "v[42:49]", "v[43:50]", "v[44:51]", + "v[45:52]", "v[46:53]", "v[47:54]", "v[48:55]", "v[49:56]", + "v[50:57]", "v[51:58]", "v[52:59]", "v[53:60]", "v[54:61]", + "v[55:62]", "v[56:63]", "v[57:64]", "v[58:65]", "v[59:66]", + "v[60:67]", "v[61:68]", "v[62:69]", "v[63:70]", "v[64:71]", + "v[65:72]", "v[66:73]", "v[67:74]", "v[68:75]", "v[69:76]", + "v[70:77]", "v[71:78]", "v[72:79]", "v[73:80]", "v[74:81]", + "v[75:82]", "v[76:83]", "v[77:84]", "v[78:85]", "v[79:86]", + "v[80:87]", "v[81:88]", "v[82:89]", "v[83:90]", "v[84:91]", + "v[85:92]", "v[86:93]", "v[87:94]", "v[88:95]", "v[89:96]", + "v[90:97]", "v[91:98]", "v[92:99]", "v[93:100]", "v[94:101]", + "v[95:102]", "v[96:103]", "v[97:104]", "v[98:105]", "v[99:106]", + "v[100:107]", "v[101:108]", "v[102:109]", "v[103:110]", "v[104:111]", + "v[105:112]", "v[106:113]", "v[107:114]", "v[108:115]", "v[109:116]", + "v[110:117]", "v[111:118]", "v[112:119]", "v[113:120]", "v[114:121]", + "v[115:122]", "v[116:123]", "v[117:124]", "v[118:125]", "v[119:126]", + "v[120:127]", "v[121:128]", "v[122:129]", "v[123:130]", "v[124:131]", + "v[125:132]", "v[126:133]", "v[127:134]", "v[128:135]", "v[129:136]", + "v[130:137]", "v[131:138]", "v[132:139]", "v[133:140]", "v[134:141]", + "v[135:142]", "v[136:143]", "v[137:144]", "v[138:145]", "v[139:146]", + "v[140:147]", "v[141:148]", "v[142:149]", "v[143:150]", "v[144:151]", + "v[145:152]", "v[146:153]", "v[147:154]", "v[148:155]", "v[149:156]", + "v[150:157]", "v[151:158]", "v[152:159]", "v[153:160]", "v[154:161]", + "v[155:162]", "v[156:163]", "v[157:164]", "v[158:165]", "v[159:166]", + "v[160:167]", "v[161:168]", "v[162:169]", "v[163:170]", "v[164:171]", + "v[165:172]", "v[166:173]", "v[167:174]", "v[168:175]", "v[169:176]", + "v[170:177]", "v[171:178]", "v[172:179]", "v[173:180]", "v[174:181]", + "v[175:182]", "v[176:183]", "v[177:184]", "v[178:185]", "v[179:186]", + "v[180:187]", "v[181:188]", "v[182:189]", "v[183:190]", "v[184:191]", + "v[185:192]", "v[186:193]", "v[187:194]", "v[188:195]", "v[189:196]", + "v[190:197]", "v[191:198]", "v[192:199]", "v[193:200]", "v[194:201]", + "v[195:202]", "v[196:203]", "v[197:204]", "v[198:205]", "v[199:206]", + "v[200:207]", "v[201:208]", "v[202:209]", "v[203:210]", "v[204:211]", + "v[205:212]", "v[206:213]", "v[207:214]", "v[208:215]", "v[209:216]", + "v[210:217]", "v[211:218]", "v[212:219]", "v[213:220]", "v[214:221]", + "v[215:222]", "v[216:223]", "v[217:224]", "v[218:225]", "v[219:226]", + "v[220:227]", "v[221:228]", "v[222:229]", "v[223:230]", "v[224:231]", + "v[225:232]", "v[226:233]", "v[227:234]", "v[228:235]", "v[229:236]", + "v[230:237]", "v[231:238]", "v[232:239]", "v[233:240]", "v[234:241]", + "v[235:242]", "v[236:243]", "v[237:244]", "v[238:245]", "v[239:246]", + "v[240:247]", "v[241:248]", "v[242:249]", "v[243:250]", "v[244:251]", + "v[245:252]", "v[246:253]", "v[247:254]", "v[248:255]" +}; + +static const char *const VGPR512RegNames[] = { + "v[0:15]", "v[1:16]", "v[2:17]", "v[3:18]", "v[4:19]", + "v[5:20]", "v[6:21]", "v[7:22]", "v[8:23]", "v[9:24]", + "v[10:25]", "v[11:26]", "v[12:27]", "v[13:28]", "v[14:29]", + "v[15:30]", "v[16:31]", "v[17:32]", "v[18:33]", "v[19:34]", + "v[20:35]", "v[21:36]", "v[22:37]", "v[23:38]", "v[24:39]", + "v[25:40]", "v[26:41]", "v[27:42]", "v[28:43]", "v[29:44]", + "v[30:45]", "v[31:46]", "v[32:47]", "v[33:48]", "v[34:49]", + "v[35:50]", "v[36:51]", "v[37:52]", "v[38:53]", "v[39:54]", + "v[40:55]", "v[41:56]", "v[42:57]", "v[43:58]", "v[44:59]", + "v[45:60]", "v[46:61]", "v[47:62]", "v[48:63]", "v[49:64]", + "v[50:65]", "v[51:66]", "v[52:67]", "v[53:68]", "v[54:69]", + "v[55:70]", "v[56:71]", "v[57:72]", "v[58:73]", "v[59:74]", + "v[60:75]", "v[61:76]", "v[62:77]", "v[63:78]", "v[64:79]", + "v[65:80]", "v[66:81]", "v[67:82]", "v[68:83]", "v[69:84]", + "v[70:85]", "v[71:86]", "v[72:87]", "v[73:88]", "v[74:89]", + "v[75:90]", "v[76:91]", "v[77:92]", "v[78:93]", "v[79:94]", + "v[80:95]", "v[81:96]", "v[82:97]", "v[83:98]", "v[84:99]", + "v[85:100]", "v[86:101]", "v[87:102]", "v[88:103]", "v[89:104]", + "v[90:105]", "v[91:106]", "v[92:107]", "v[93:108]", "v[94:109]", + "v[95:110]", "v[96:111]", "v[97:112]", "v[98:113]", "v[99:114]", + "v[100:115]", "v[101:116]", "v[102:117]", "v[103:118]", "v[104:119]", + "v[105:120]", "v[106:121]", "v[107:122]", "v[108:123]", "v[109:124]", + "v[110:125]", "v[111:126]", "v[112:127]", "v[113:128]", "v[114:129]", + "v[115:130]", "v[116:131]", "v[117:132]", "v[118:133]", "v[119:134]", + "v[120:135]", "v[121:136]", "v[122:137]", "v[123:138]", "v[124:139]", + "v[125:140]", "v[126:141]", "v[127:142]", "v[128:143]", "v[129:144]", + "v[130:145]", "v[131:146]", "v[132:147]", "v[133:148]", "v[134:149]", + "v[135:150]", "v[136:151]", "v[137:152]", "v[138:153]", "v[139:154]", + "v[140:155]", "v[141:156]", "v[142:157]", "v[143:158]", "v[144:159]", + "v[145:160]", "v[146:161]", "v[147:162]", "v[148:163]", "v[149:164]", + "v[150:165]", "v[151:166]", "v[152:167]", "v[153:168]", "v[154:169]", + "v[155:170]", "v[156:171]", "v[157:172]", "v[158:173]", "v[159:174]", + "v[160:175]", "v[161:176]", "v[162:177]", "v[163:178]", "v[164:179]", + "v[165:180]", "v[166:181]", "v[167:182]", "v[168:183]", "v[169:184]", + "v[170:185]", "v[171:186]", "v[172:187]", "v[173:188]", "v[174:189]", + "v[175:190]", "v[176:191]", "v[177:192]", "v[178:193]", "v[179:194]", + "v[180:195]", "v[181:196]", "v[182:197]", "v[183:198]", "v[184:199]", + "v[185:200]", "v[186:201]", "v[187:202]", "v[188:203]", "v[189:204]", + "v[190:205]", "v[191:206]", "v[192:207]", "v[193:208]", "v[194:209]", + "v[195:210]", "v[196:211]", "v[197:212]", "v[198:213]", "v[199:214]", + "v[200:215]", "v[201:216]", "v[202:217]", "v[203:218]", "v[204:219]", + "v[205:220]", "v[206:221]", "v[207:222]", "v[208:223]", "v[209:224]", + "v[210:225]", "v[211:226]", "v[212:227]", "v[213:228]", "v[214:229]", + "v[215:230]", "v[216:231]", "v[217:232]", "v[218:233]", "v[219:234]", + "v[220:235]", "v[221:236]", "v[222:237]", "v[223:238]", "v[224:239]", + "v[225:240]", "v[226:241]", "v[227:242]", "v[228:243]", "v[229:244]", + "v[230:245]", "v[231:246]", "v[232:247]", "v[233:248]", "v[234:249]", + "v[235:250]", "v[236:251]", "v[237:252]", "v[238:253]", "v[239:254]", + "v[240:255]" +}; + +static const char *const SGPR64RegNames[] = { + "s[0:1]", "s[2:3]", "s[4:5]", "s[6:7]", "s[8:9]", "s[10:11]", + "s[12:13]", "s[14:15]", "s[16:17]", "s[18:19]", "s[20:21]", "s[22:23]", + "s[24:25]", "s[26:27]", "s[28:29]", "s[30:31]", "s[32:33]", "s[34:35]", + "s[36:37]", "s[38:39]", "s[40:41]", "s[42:43]", "s[44:45]", "s[46:47]", + "s[48:49]", "s[50:51]", "s[52:53]", "s[54:55]", "s[56:57]", "s[58:59]", + "s[60:61]", "s[62:63]", "s[64:65]", "s[66:67]", "s[68:69]", "s[70:71]", + "s[72:73]", "s[74:75]", "s[76:77]", "s[78:79]", "s[80:81]", "s[82:83]", + "s[84:85]", "s[86:87]", "s[88:89]", "s[90:91]", "s[92:93]", "s[94:95]", + "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]" +}; + +static const char *const SGPR128RegNames[] = { + "s[0:3]", "s[4:7]", "s[8:11]", "s[12:15]", "s[16:19]", "s[20:23]", + "s[24:27]", "s[28:31]", "s[32:35]", "s[36:39]", "s[40:43]", "s[44:47]", + "s[48:51]", "s[52:55]", "s[56:59]", "s[60:63]", "s[64:67]", "s[68:71]", + "s[72:75]", "s[76:79]", "s[80:83]", "s[84:87]", "s[88:91]", "s[92:95]", + "s[96:99]", "s[100:103]" +}; + +static const char *const SGPR256RegNames[] = { + "s[0:7]", "s[4:11]", "s[8:15]", "s[12:19]", "s[16:23]", + "s[20:27]", "s[24:31]", "s[28:35]", "s[32:39]", "s[36:43]", + "s[40:47]", "s[44:51]", "s[48:55]", "s[52:59]", "s[56:63]", + "s[60:67]", "s[64:71]", "s[68:75]", "s[72:79]", "s[76:83]", + "s[80:87]", "s[84:91]", "s[88:95]", "s[92:99]", "s[96:103]" +}; + +static const char *const SGPR512RegNames[] = { + "s[0:15]", "s[4:19]", "s[8:23]", "s[12:27]", "s[16:31]", "s[20:35]", + "s[24:39]", "s[28:43]", "s[32:47]", "s[36:51]", "s[40:55]", "s[44:59]", + "s[48:63]", "s[52:67]", "s[56:71]", "s[60:75]", "s[64:79]", "s[68:83]", + "s[72:87]", "s[76:91]", "s[80:95]", "s[84:99]", "s[88:103]" +}; + +#endif Index: lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/CMakeLists.txt +++ lib/Target/AMDGPU/CMakeLists.txt @@ -58,6 +58,7 @@ AMDGPUISelLowering.cpp AMDGPUInstrInfo.cpp AMDGPUPromoteAlloca.cpp + AMDGPURegAsmNames.inc.cpp AMDGPURegisterInfo.cpp AMDGPUUnifyDivergentExitNodes.cpp GCNHazardRecognizer.cpp Index: lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.h +++ lib/Target/AMDGPU/SIRegisterInfo.h @@ -118,6 +118,8 @@ bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const; + StringRef getRegAsmName(unsigned Reg) const override; + unsigned getHWRegIndex(unsigned Reg) const { return getEncodingValue(Reg) & 0xff; } Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1104,6 +1104,66 @@ } } +StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const { + #define AMDGPU_REG_ASM_NAMES + #include "AMDGPURegAsmNames.inc.cpp" + + #define REG_RANGE(BeginReg, EndReg, RegTable) \ + if (Reg >= BeginReg && Reg <= EndReg) { \ + unsigned Index = Reg - BeginReg; \ + assert(Index < array_lengthof(RegTable)); \ + return RegTable[Index]; \ + } + + REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames); + REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames); + REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames); + REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames); + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255, + VGPR96RegNames); + + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3, + AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255, + VGPR128RegNames); + REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, + AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103, + SGPR128RegNames); + + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7, + AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, + VGPR256RegNames); + + REG_RANGE( + AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15, + AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, + VGPR512RegNames); + + REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7, + AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, + SGPR256RegNames); + + REG_RANGE( + AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15, + AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, + SGPR512RegNames + ); + +#undef REG_RANGE + + // FIXME: Rename flat_scr so we don't need to special case this. + switch (Reg) { + case AMDGPU::FLAT_SCR: + return "flat_scratch"; + case AMDGPU::FLAT_SCR_LO: + return "flat_scratch_lo"; + case AMDGPU::FLAT_SCR_HI: + return "flat_scratch_hi"; + default: + // For the special named registers the default is fine. + return TargetRegisterInfo::getRegAsmName(Reg); + } +} + // FIXME: This is very slow. It might be worth creating a map from physreg to // register class. const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { Index: lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP3Instructions.td +++ lib/Target/AMDGPU/VOP3Instructions.td @@ -209,7 +209,10 @@ } def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile, int_amdgcn_msad_u8>; + +let Constraints = "@earlyclobber $vdst" in { def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile, int_amdgcn_mqsad_pk_u16_u8>; +} // End Constraints = "@earlyclobber $vdst" def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile, AMDGPUtrig_preop> { let SchedRW = [WriteDouble]; @@ -232,8 +235,10 @@ let SubtargetPredicate = isCIVI in { +let Constraints = "@earlyclobber $vdst" in { def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile, int_amdgcn_qsad_pk_u16_u8>; def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile, int_amdgcn_mqsad_u32_u8>; +} // End Constraints = "@earlyclobber $vdst" let isCommutable = 1 in { def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; Index: lib/Target/Hexagon/HexagonGenMux.cpp =================================================================== --- lib/Target/Hexagon/HexagonGenMux.cpp +++ lib/Target/Hexagon/HexagonGenMux.cpp @@ -235,8 +235,11 @@ unsigned DR = MI->getOperand(0).getReg(); if (isRegPair(DR)) continue; + MachineOperand &PredOp = MI->getOperand(1); + if (PredOp.isUndef()) + continue; - unsigned PR = MI->getOperand(1).getReg(); + unsigned PR = PredOp.getReg(); unsigned Idx = I2X.lookup(MI); CondsetMap::iterator F = CM.find(DR); bool IfTrue = HII->isPredicatedTrue(Opc); Index: lib/Target/Hexagon/HexagonMachineScheduler.cpp =================================================================== --- lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -563,40 +563,33 @@ } #endif -/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor -/// of SU, return it, otherwise return null. -static SUnit *getSingleUnscheduledPred(SUnit *SU) { - SUnit *OnlyAvailablePred = nullptr; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - SUnit &Pred = *I->getSUnit(); - if (!Pred.isScheduled) { - // We found an available, but not scheduled, predecessor. If it's the - // only one we have found, keep track of it... otherwise give up. - if (OnlyAvailablePred && OnlyAvailablePred != &Pred) - return nullptr; - OnlyAvailablePred = &Pred; - } +/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor +/// of SU, return true (we may have duplicates) +static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { + if (SU->NumPredsLeft == 0) + return false; + + for (auto &Pred : SU->Preds) { + // We found an available, but not scheduled, predecessor. + if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) + return false; } - return OnlyAvailablePred; + + return true; } -/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor -/// of SU, return it, otherwise return null. -static SUnit *getSingleUnscheduledSucc(SUnit *SU) { - SUnit *OnlyAvailableSucc = nullptr; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - SUnit &Succ = *I->getSUnit(); - if (!Succ.isScheduled) { - // We found an available, but not scheduled, successor. If it's the - // only one we have found, keep track of it... otherwise give up. - if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ) - return nullptr; - OnlyAvailableSucc = &Succ; - } +/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor +/// of SU, return true (we may have duplicates) +static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { + if (SU->NumSuccsLeft == 0) + return false; + + for (auto &Succ : SU->Succs) { + // We found an available, but not scheduled, successor. + if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) + return false; } - return OnlyAvailableSucc; + return true; } // Constants used to denote relative importance of @@ -673,12 +666,12 @@ // Count the number of nodes that // this node is the sole unscheduled node for. for (const SDep &SI : SU->Succs) - if (getSingleUnscheduledPred(SI.getSUnit()) == SU) + if (isSingleUnscheduledPred(SI.getSUnit(), SU)) ++NumNodesBlocking; } else { // How many unscheduled predecessors block this node? for (const SDep &PI : SU->Preds) - if (getSingleUnscheduledSucc(PI.getSUnit()) == SU) + if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) ++NumNodesBlocking; } ResCount += (NumNodesBlocking * ScaleTwo); Index: lib/Target/PowerPC/PPCBoolRetToInt.cpp =================================================================== --- lib/Target/PowerPC/PPCBoolRetToInt.cpp +++ lib/Target/PowerPC/PPCBoolRetToInt.cpp @@ -7,15 +7,15 @@ // //===----------------------------------------------------------------------===// // -// This file implements converting i1 values to i32 if they could be more +// This file implements converting i1 values to i32/i64 if they could be more // profitably allocated as GPRs rather than CRs. This pass will become totally // unnecessary if Register Bank Allocation and Global Instruction Selection ever // go upstream. // -// Presently, the pass converts i1 Constants, and Arguments to i32 if the +// Presently, the pass converts i1 Constants, and Arguments to i32/i64 if the // transitive closure of their uses includes only PHINodes, CallInsts, and // ReturnInsts. The rational is that arguments are generally passed and returned -// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will +// in GPRs rather than CRs, so casting them to i32/i64 at the LLVM IR level will // actually save casts at the Machine Instruction level. // // It might be useful to expand this pass to add bit-wise operations to the list @@ -33,6 +33,7 @@ //===----------------------------------------------------------------------===// #include "PPC.h" +#include "PPCTargetMachine.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -51,6 +52,7 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Casting.h" #include @@ -87,17 +89,19 @@ return Defs; } - // Translate a i1 value to an equivalent i32 value: - static Value *translate(Value *V) { - Type *Int32Ty = Type::getInt32Ty(V->getContext()); + // Translate a i1 value to an equivalent i32/i64 value: + Value *translate(Value *V) { + Type *IntTy = ST->isPPC64() ? Type::getInt64Ty(V->getContext()) + : Type::getInt32Ty(V->getContext()); + if (auto *C = dyn_cast(V)) - return ConstantExpr::getZExt(C, Int32Ty); + return ConstantExpr::getZExt(C, IntTy); if (auto *P = dyn_cast(V)) { // Temporarily set the operands to 0. We'll fix this later in // runOnUse. - Value *Zero = Constant::getNullValue(Int32Ty); + Value *Zero = Constant::getNullValue(IntTy); PHINode *Q = - PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P); + PHINode::Create(IntTy, P->getNumIncomingValues(), P->getName(), P); for (unsigned i = 0; i < P->getNumOperands(); ++i) Q->addIncoming(Zero, P->getIncomingBlock(i)); return Q; @@ -109,7 +113,7 @@ auto InstPt = A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode(); - return new ZExtInst(V, Int32Ty, "", InstPt); + return new ZExtInst(V, IntTy, "", InstPt); } typedef SmallPtrSet PHINodeSet; @@ -185,6 +189,13 @@ if (skipFunction(F)) return false; + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + return false; + + auto &TM = TPC->getTM(); + ST = TM.getSubtargetImpl(F); + PHINodeSet PromotablePHINodes = getPromotablePHINodes(F); B2IMap Bool2IntMap; bool Changed = false; @@ -205,7 +216,7 @@ return Changed; } - static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, + bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, B2IMap &BoolToIntMap) { auto Defs = findAllDefs(U); @@ -262,13 +273,16 @@ AU.addPreserved(); FunctionPass::getAnalysisUsage(AU); } + +private: + const PPCSubtarget *ST; }; } // end anonymous namespace char PPCBoolRetToInt::ID = 0; INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int", - "Convert i1 constants to i32 if they are returned", + "Convert i1 constants to i32/i64 if they are returned", false, false) FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); } Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -2717,6 +2717,40 @@ dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0)))); dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1)))); } + +def ByteToWord { + dag A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); + dag A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); + dag A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8)); + dag A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8)); +} + +def ByteToDWord { + dag A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8)); + dag A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8)); +} + +def HWordToWord { + dag A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16)); + dag A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16)); + dag A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16)); + dag A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16)); +} + +def HWordToDWord { + dag A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16)); + dag A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16)); +} + +def WordToDWord { + dag A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0)))); + dag A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2)))); +} + def FltToIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A))))); } @@ -2969,4 +3003,21 @@ (VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC), (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>; } + // P9 Altivec instructions that can be used to build vectors. + // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete + // with complexities of existing build vector patterns in this file. + let Predicates = [HasP9Altivec] in { + def : Pat<(v2i64 (build_vector WordToDWord.A0, WordToDWord.A1)), + (v2i64 (VEXTSW2D $A))>; + def : Pat<(v2i64 (build_vector HWordToDWord.A0, HWordToDWord.A1)), + (v2i64 (VEXTSH2D $A))>; + def : Pat<(v4i32 (build_vector HWordToWord.A0, HWordToWord.A1, + HWordToWord.A2, HWordToWord.A3)), + (v4i32 (VEXTSH2W $A))>; + def : Pat<(v4i32 (build_vector ByteToWord.A0, ByteToWord.A1, + ByteToWord.A2, ByteToWord.A3)), + (v4i32 (VEXTSB2W $A))>; + def : Pat<(v2i64 (build_vector ByteToDWord.A0, ByteToDWord.A1)), + (v2i64 (VEXTSB2D $A))>; + } } Index: lib/Target/WebAssembly/WebAssemblyFastISel.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -69,6 +69,7 @@ bool isFIBase() const { return Kind == FrameIndexBase; } void setReg(unsigned Reg) { assert(isRegBase() && "Invalid base register access!"); + assert(Base.Reg == 0 && "Overwriting non-zero register"); Base.Reg = Reg; } unsigned getReg() const { @@ -261,22 +262,24 @@ TmpOffset += CI->getSExtValue() * S; break; } + if (!canFoldAddIntoGEP(U, Op)) { + goto unsupported_gep; + } if (S == 1 && Addr.isRegBase() && Addr.getReg() == 0) { + // assert(isa(Op) && "Must be an add"); + // assert(isa(cast(Op)->getOperand(1)) && + // "Must have a constant operand"); // An unscaled add of a register. Set it as the new base. Addr.setReg(getRegForValue(Op)); break; } - if (canFoldAddIntoGEP(U, Op)) { - // A compatible add with a constant operand. Fold the constant. - ConstantInt *CI = - cast(cast(Op)->getOperand(1)); - TmpOffset += CI->getSExtValue() * S; - // Iterate on the other operand. - Op = cast(Op)->getOperand(0); - continue; - } - // Unsupported - goto unsupported_gep; + // A compatible add with a constant operand. Fold the constant. + ConstantInt *CI = + cast(cast(Op)->getOperand(1)); + TmpOffset += CI->getSExtValue() * S; + // Iterate on the other operand. + Op = cast(Op)->getOperand(0); + continue; } } } Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -81,6 +81,12 @@ " of the loop header PC will be 0)."), cl::Hidden); +static cl::opt MulConstantOptimization( + "mul-constant-optimization", cl::init(true), + cl::desc("Replace 'mul x, Const' with more effective instructions like " + "SHIFT, LEA, etc."), + cl::Hidden); + /// Call this when the user attempts to do something unsupported, like /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike /// report_fatal_error, so calling code should attempt to recover without @@ -31031,6 +31037,77 @@ } } +static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, + EVT VT, SDLoc DL) { + + auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) { + SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), + DAG.getConstant(Mult, DL, VT)); + Result = DAG.getNode(ISD::SHL, DL, VT, Result, + DAG.getConstant(Shift, DL, MVT::i8)); + Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, + N->getOperand(0)); + return Result; + }; + + auto combineMulMulAddOrSub = [&](bool isAdd) { + SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), + DAG.getConstant(9, DL, VT)); + Result = DAG.getNode(ISD::MUL, DL, VT, Result, DAG.getConstant(3, DL, VT)); + Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, + N->getOperand(0)); + return Result; + }; + + switch (MulAmt) { + default: + break; + case 11: + // mul x, 11 => add ((shl (mul x, 5), 1), x) + return combineMulShlAddOrSub(5, 1, /*isAdd*/ true); + case 21: + // mul x, 21 => add ((shl (mul x, 5), 2), x) + return combineMulShlAddOrSub(5, 2, /*isAdd*/ true); + case 22: + // mul x, 22 => add (add ((shl (mul x, 5), 2), x), x) + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + combineMulShlAddOrSub(5, 2, /*isAdd*/ true)); + case 19: + // mul x, 19 => sub ((shl (mul x, 5), 2), x) + return combineMulShlAddOrSub(5, 2, /*isAdd*/ false); + case 13: + // mul x, 13 => add ((shl (mul x, 3), 2), x) + return combineMulShlAddOrSub(3, 2, /*isAdd*/ true); + case 23: + // mul x, 13 => sub ((shl (mul x, 3), 3), x) + return combineMulShlAddOrSub(3, 3, /*isAdd*/ false); + case 14: + // mul x, 14 => add (add ((shl (mul x, 3), 2), x), x) + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + combineMulShlAddOrSub(3, 2, /*isAdd*/ true)); + case 26: + // mul x, 26 => sub ((mul (mul x, 9), 3), x) + return combineMulMulAddOrSub(/*isAdd*/ false); + case 28: + // mul x, 28 => add ((mul (mul x, 9), 3), x) + return combineMulMulAddOrSub(/*isAdd*/ true); + case 29: + // mul x, 29 => add (add ((mul (mul x, 9), 3), x), x) + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + combineMulMulAddOrSub(/*isAdd*/ true)); + case 30: + // mul x, 30 => sub (sub ((shl x, 5), x), x) + return DAG.getNode( + ISD::SUB, DL, VT, + DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(5, DL, MVT::i8)), + N->getOperand(0)), + N->getOperand(0)); + } + return SDValue(); +} + /// Optimize a single multiply with constant into two operations in order to /// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA. static SDValue combineMul(SDNode *N, SelectionDAG &DAG, @@ -31040,6 +31117,8 @@ if (DCI.isBeforeLegalize() && VT.isVector()) return reduceVMULWidth(N, DAG, Subtarget); + if (!MulConstantOptimization) + return SDValue(); // An imul is usually smaller than the alternative sequence. if (DAG.getMachineFunction().getFunction()->optForMinSize()) return SDValue(); @@ -31095,7 +31174,8 @@ else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, DAG.getConstant(MulAmt2, DL, VT)); - } + } else if (!Subtarget.slowLEA()) + NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL); if (!NewMul) { assert(MulAmt != 0 && Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -5183,14 +5183,14 @@ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, - Sched<[WriteFAdd]>; + Sched<[WriteFHAdd]>; def rm : S3DI, Sched<[WriteFAddLd, ReadAfterLd]>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>; } multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag, @@ -5200,14 +5200,14 @@ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, - Sched<[WriteFAdd]>; + Sched<[WriteFHAdd]>; def rm : S3I, Sched<[WriteFAddLd, ReadAfterLd]>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -5310,7 +5310,7 @@ // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// -let Sched = WriteVecALU in { +let Sched = WritePHAdd in { def SSE_PHADDSUBD : OpndItins< IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM >; Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -1488,6 +1488,39 @@ //-- Arithmetic instructions --// +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; +} + +// x,m / v,v,m. +def : WriteRes { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1, 2, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; +} +// v <- v,m. +def : WriteRes { + let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [1, 2, 1]; +} + // PHADD|PHSUB (S) W/D. // v <- v,v. def WritePHADDSUBr : SchedWriteRes<[HWPort1, HWPort5]> { Index: lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- lib/Target/X86/X86SchedSandyBridge.td +++ lib/Target/X86/X86SchedSandyBridge.td @@ -157,6 +157,31 @@ let ResourceCycles = [1, 1, 1, 1]; } +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { + let Latency = 3; +} + +// x,m / v,v,m. +def : WriteRes { + let Latency = 7; + let ResourceCycles = [1, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes; + +// v <- v,m. +def : WriteRes { + let Latency = 5; + let ResourceCycles = [1, 1]; +} + // String instructions. // Packed Compare Implicit Length Strings, Return Mask def : WriteRes { Index: lib/Target/X86/X86Schedule.td =================================================================== --- lib/Target/X86/X86Schedule.td +++ lib/Target/X86/X86Schedule.td @@ -77,6 +77,10 @@ // FMA Scheduling helper class. class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } +// Horizontal Add/Sub (float and integer) +defm WriteFHAdd : X86SchedWritePair; +defm WritePHAdd : X86SchedWritePair; + // Vector integer operations. defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -319,6 +319,38 @@ let ResourceCycles = [1, 1]; } +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes { + let Latency = 3; +} + +def : WriteRes { + let Latency = 8; +} + +def : WriteRes { + let ResourceCycles = [1]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [1, 1]; +} + +def WriteFHAddY: SchedWriteRes<[JFPU0]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[WriteFHAddY], (instregex "VH(ADD|SUB)P(S|D)Yrr")>; + +def WriteFHAddYLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 8; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteFHAddYLd], (instregex "VH(ADD|SUB)P(S|D)Yrm")>; + //////////////////////////////////////////////////////////////////////////////// // Carry-less multiplication instructions. //////////////////////////////////////////////////////////////////////////////// Index: lib/Target/X86/X86ScheduleSLM.td =================================================================== --- lib/Target/X86/X86ScheduleSLM.td +++ lib/Target/X86/X86ScheduleSLM.td @@ -137,6 +137,33 @@ defm : SMWriteResPair; defm : SMWriteResPair; +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +// HADD, HSUB PS/PD + +def : WriteRes { + let Latency = 3; + let ResourceCycles = [2]; +} + +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1]; +} + +// PHADD|PHSUB (S) W/D. +def : WriteRes { + let Latency = 1; + let ResourceCycles = [1]; +} + +def : WriteRes { + let Latency = 4; + let ResourceCycles = [1, 1]; +} + // String instructions. // Packed Compare Implicit Length Strings, Return Mask def : WriteRes { Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -695,6 +695,13 @@ CallSite(I).isIndirectCall()) for (const auto *FS : findIndirectCallFunctionSamples(*I)) { auto CalleeFunctionName = FS->getName(); + // If it is a recursive call, we do not inline it as it could bloat + // the code exponentially. There is way to better handle this, e.g. + // clone the caller first, and inline the cloned caller if it is + // recursive. As llvm does not inline recursive calls, we will simply + // ignore it instead of handling it explicitly. + if (CalleeFunctionName == F.getName()) + continue; const char *Reason = "Callee function not available"; auto R = SymbolMap.find(CalleeFunctionName); if (R == SymbolMap.end()) Index: lib/Transforms/Scalar/GVNSink.cpp =================================================================== --- lib/Transforms/Scalar/GVNSink.cpp +++ lib/Transforms/Scalar/GVNSink.cpp @@ -169,8 +169,8 @@ NumExtraPHIs) // PHIs are expensive, so make sure they're worth it. - SplitEdgeCost; } - bool operator>=(const SinkingInstructionCandidate &Other) const { - return Cost >= Other.Cost; + bool operator>(const SinkingInstructionCandidate &Other) const { + return Cost > Other.Cost; } }; @@ -745,7 +745,7 @@ std::stable_sort( Candidates.begin(), Candidates.end(), [](const SinkingInstructionCandidate &A, - const SinkingInstructionCandidate &B) { return A >= B; }); + const SinkingInstructionCandidate &B) { return A > B; }); DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C : Candidates) dbgs() << " " << C << "\n";); Index: lib/Transforms/Scalar/InferAddressSpaces.cpp =================================================================== --- lib/Transforms/Scalar/InferAddressSpaces.cpp +++ lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -500,6 +500,7 @@ } // Computes the operands of the new constant expression. + bool IsNew = false; SmallVector NewOperands; for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) { Constant *Operand = CE->getOperand(Index); @@ -509,6 +510,7 @@ // bitcast, and getelementptr) do not incur cycles in the data flow graph // and (2) this function is called on constant expressions in postorder. if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) { + IsNew = true; NewOperands.push_back(cast(NewOperand)); } else { // Otherwise, reuses the old operand. @@ -516,6 +518,11 @@ } } + // If !IsNew, we will replace the Value with itself. However, replaced values + // are assumed to wrapped in a addrspace cast later so drop it now. + if (!IsNew) + return nullptr; + if (CE->getOpcode() == Instruction::GetElementPtr) { // Needs to specify the source type while constructing a getelementptr // constant expression. Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -132,7 +132,7 @@ bool HasProfileData = F.getEntryCount().hasValue(); if (HasProfileData) { LoopInfo LI{DominatorTree(F)}; - BPI.reset(new BranchProbabilityInfo(F, LI)); + BPI.reset(new BranchProbabilityInfo(F, LI, TLI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } @@ -152,7 +152,7 @@ bool HasProfileData = F.getEntryCount().hasValue(); if (HasProfileData) { LoopInfo LI{DominatorTree(F)}; - BPI.reset(new BranchProbabilityInfo(F, LI)); + BPI.reset(new BranchProbabilityInfo(F, LI, &TLI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } Index: test/Analysis/BranchProbabilityInfo/libfunc_call.ll =================================================================== --- /dev/null +++ test/Analysis/BranchProbabilityInfo/libfunc_call.ll @@ -0,0 +1,264 @@ +; RUN: opt < %s -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -analyze -lazy-branch-prob | FileCheck %s +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +declare i32 @strcmp(i8*, i8*) +declare i32 @strncmp(i8*, i8*, i32) +declare i32 @strcasecmp(i8*, i8*) +declare i32 @strncasecmp(i8*, i8*, i32) +declare i32 @memcmp(i8*, i8*) +declare i32 @nonstrcmp(i8*, i8*) + + +; Check that the result of strcmp is considered more likely to be nonzero than +; zero, and equally likely to be (nonzero) positive or negative. + +define i32 @test_strcmp_eq(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strcmp_eq' +entry: + %val = call i32 @strcmp(i8* %p, i8* %q) + %cond = icmp eq i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge entry -> else probability is 0x50000000 / 0x80000000 = 62.50% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_strcmp_ne(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strcmp_ne' +entry: + %val = call i32 @strcmp(i8* %p, i8* %q) + %cond = icmp ne i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% +; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_strcmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strcmp_sgt' +entry: + %val = call i32 @strcmp(i8* %p, i8* %q) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_strcmp_slt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strcmp_slt' +entry: + %val = call i32 @strcmp(i8* %p, i8* %q) + %cond = icmp slt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + + +; Similarly check other library functions that have the same behaviour + +define i32 @test_strncmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strncmp_sgt' +entry: + %val = call i32 @strncmp(i8* %p, i8* %q, i32 4) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_strcasecmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strcasecmp_sgt' +entry: + %val = call i32 @strcasecmp(i8* %p, i8* %q) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_strncasecmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strncasecmp_sgt' +entry: + %val = call i32 @strncasecmp(i8* %p, i8* %q, i32 4) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_memcmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_memcmp_sgt' +entry: + %val = call i32 @memcmp(i8* %p, i8* %q) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + + +; Check that for the result of a call to a non-library function the default +; heuristic is applied, i.e. positive more likely than negative, nonzero more +; likely than zero. + +define i32 @test_nonstrcmp_eq(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_eq' +entry: + %val = call i32 @nonstrcmp(i8* %p, i8* %q) + %cond = icmp eq i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge entry -> else probability is 0x50000000 / 0x80000000 = 62.50% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_nonstrcmp_ne(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_ne' +entry: + %val = call i32 @nonstrcmp(i8* %p, i8* %q) + %cond = icmp ne i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% +; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_nonstrcmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_sgt' +entry: + %val = call i32 @nonstrcmp(i8* %p, i8* %q) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% +; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} Index: test/CodeGen/AMDGPU/branch-relax-spill.ll =================================================================== --- test/CodeGen/AMDGPU/branch-relax-spill.ll +++ test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -7,110 +7,110 @@ define amdgpu_kernel void @spill(i32 addrspace(1)* %arg, i32 %cnd) #0 { entry: - %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={SGPR0}"() #0 - %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={SGPR1}"() #0 - %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={SGPR2}"() #0 - %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={SGPR3}"() #0 - %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={SGPR4}"() #0 - %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={SGPR5}"() #0 - %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={SGPR6}"() #0 - %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={SGPR7}"() #0 - %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={SGPR8}"() #0 - %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={SGPR9}"() #0 - %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={SGPR10}"() #0 - %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={SGPR11}"() #0 - %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={SGPR12}"() #0 - %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={SGPR13}"() #0 - %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={SGPR14}"() #0 - %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={SGPR15}"() #0 - %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={SGPR16}"() #0 - %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={SGPR17}"() #0 - %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={SGPR18}"() #0 - %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={SGPR19}"() #0 - %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={SGPR20}"() #0 - %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={SGPR21}"() #0 - %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={SGPR22}"() #0 - %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={SGPR23}"() #0 - %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={SGPR24}"() #0 - %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={SGPR25}"() #0 - %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={SGPR26}"() #0 - %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={SGPR27}"() #0 - %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={SGPR28}"() #0 - %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={SGPR29}"() #0 - %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={SGPR30}"() #0 - %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={SGPR31}"() #0 - %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={SGPR32}"() #0 - %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={SGPR33}"() #0 - %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={SGPR34}"() #0 - %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={SGPR35}"() #0 - %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={SGPR36}"() #0 - %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={SGPR37}"() #0 - %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={SGPR38}"() #0 - %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={SGPR39}"() #0 - %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={SGPR40}"() #0 - %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={SGPR41}"() #0 - %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={SGPR42}"() #0 - %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={SGPR43}"() #0 - %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={SGPR44}"() #0 - %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={SGPR45}"() #0 - %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={SGPR46}"() #0 - %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={SGPR47}"() #0 - %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={SGPR48}"() #0 - %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={SGPR49}"() #0 - %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={SGPR50}"() #0 - %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={SGPR51}"() #0 - %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={SGPR52}"() #0 - %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={SGPR53}"() #0 - %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={SGPR54}"() #0 - %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={SGPR55}"() #0 - %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={SGPR56}"() #0 - %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={SGPR57}"() #0 - %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={SGPR58}"() #0 - %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={SGPR59}"() #0 - %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={SGPR60}"() #0 - %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={SGPR61}"() #0 - %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={SGPR62}"() #0 - %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={SGPR63}"() #0 - %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={SGPR64}"() #0 - %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={SGPR65}"() #0 - %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={SGPR66}"() #0 - %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={SGPR67}"() #0 - %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={SGPR68}"() #0 - %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={SGPR69}"() #0 - %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={SGPR70}"() #0 - %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={SGPR71}"() #0 - %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={SGPR72}"() #0 - %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={SGPR73}"() #0 - %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={SGPR74}"() #0 - %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={SGPR75}"() #0 - %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={SGPR76}"() #0 - %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={SGPR77}"() #0 - %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={SGPR78}"() #0 - %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={SGPR79}"() #0 - %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={SGPR80}"() #0 - %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={SGPR81}"() #0 - %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={SGPR82}"() #0 - %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={SGPR83}"() #0 - %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={SGPR84}"() #0 - %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={SGPR85}"() #0 - %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={SGPR86}"() #0 - %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={SGPR87}"() #0 - %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={SGPR88}"() #0 - %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={SGPR89}"() #0 - %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={SGPR90}"() #0 - %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={SGPR91}"() #0 - %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={SGPR92}"() #0 - %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={SGPR93}"() #0 - %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={SGPR94}"() #0 - %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={SGPR95}"() #0 - %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={SGPR96}"() #0 - %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={SGPR97}"() #0 - %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={SGPR98}"() #0 - %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={SGPR99}"() #0 - %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={SGPR100}"() #0 - %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={SGPR101}"() #0 - %sgpr102 = tail call i32 asm sideeffect "s_mov_b32 s102, 0", "={SGPR102}"() #0 - %sgpr103 = tail call i32 asm sideeffect "s_mov_b32 s103, 0", "={SGPR103}"() #0 + %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0 + %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={s1}"() #0 + %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={s2}"() #0 + %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={s3}"() #0 + %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={s4}"() #0 + %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={s5}"() #0 + %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={s6}"() #0 + %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={s7}"() #0 + %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={s8}"() #0 + %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={s9}"() #0 + %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={s10}"() #0 + %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={s11}"() #0 + %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={s12}"() #0 + %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={s13}"() #0 + %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={s14}"() #0 + %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={s15}"() #0 + %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={s16}"() #0 + %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={s17}"() #0 + %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={s18}"() #0 + %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={s19}"() #0 + %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={s20}"() #0 + %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={s21}"() #0 + %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={s22}"() #0 + %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={s23}"() #0 + %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={s24}"() #0 + %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={s25}"() #0 + %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={s26}"() #0 + %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={s27}"() #0 + %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={s28}"() #0 + %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={s29}"() #0 + %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={s30}"() #0 + %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={s31}"() #0 + %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={s32}"() #0 + %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={s33}"() #0 + %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={s34}"() #0 + %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={s35}"() #0 + %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={s36}"() #0 + %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={s37}"() #0 + %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={s38}"() #0 + %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={s39}"() #0 + %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={s40}"() #0 + %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={s41}"() #0 + %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={s42}"() #0 + %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={s43}"() #0 + %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={s44}"() #0 + %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={s45}"() #0 + %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={s46}"() #0 + %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={s47}"() #0 + %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={s48}"() #0 + %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={s49}"() #0 + %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={s50}"() #0 + %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={s51}"() #0 + %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={s52}"() #0 + %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={s53}"() #0 + %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={s54}"() #0 + %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={s55}"() #0 + %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={s56}"() #0 + %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={s57}"() #0 + %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={s58}"() #0 + %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={s59}"() #0 + %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={s60}"() #0 + %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={s61}"() #0 + %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={s62}"() #0 + %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={s63}"() #0 + %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={s64}"() #0 + %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={s65}"() #0 + %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={s66}"() #0 + %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={s67}"() #0 + %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={s68}"() #0 + %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={s69}"() #0 + %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={s70}"() #0 + %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={s71}"() #0 + %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={s72}"() #0 + %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={s73}"() #0 + %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={s74}"() #0 + %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={s75}"() #0 + %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={s76}"() #0 + %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={s77}"() #0 + %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={s78}"() #0 + %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={s79}"() #0 + %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={s80}"() #0 + %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={s81}"() #0 + %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={s82}"() #0 + %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={s83}"() #0 + %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={s84}"() #0 + %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={s85}"() #0 + %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={s86}"() #0 + %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={s87}"() #0 + %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={s88}"() #0 + %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={s89}"() #0 + %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={s90}"() #0 + %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={s91}"() #0 + %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={s92}"() #0 + %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={s93}"() #0 + %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={s94}"() #0 + %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={s95}"() #0 + %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={s96}"() #0 + %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={s97}"() #0 + %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={s98}"() #0 + %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={s99}"() #0 + %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={s100}"() #0 + %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={s101}"() #0 + %sgpr102 = tail call i32 asm sideeffect "s_mov_b32 s102, 0", "={s102}"() #0 + %sgpr103 = tail call i32 asm sideeffect "s_mov_b32 s103, 0", "={s103}"() #0 %vcc_lo = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={VCC_LO}"() #0 %vcc_hi = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={VCC_HI}"() #0 %cmp = icmp eq i32 %cnd, 0 @@ -126,112 +126,112 @@ br label %bb3 bb3: - tail call void asm sideeffect "; reg use $0", "{SGPR0}"(i32 %sgpr0) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR1}"(i32 %sgpr1) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR2}"(i32 %sgpr2) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR3}"(i32 %sgpr3) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR4}"(i32 %sgpr4) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR5}"(i32 %sgpr5) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR6}"(i32 %sgpr6) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR7}"(i32 %sgpr7) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR8}"(i32 %sgpr8) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR9}"(i32 %sgpr9) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR10}"(i32 %sgpr10) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR11}"(i32 %sgpr11) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR12}"(i32 %sgpr12) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR13}"(i32 %sgpr13) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR14}"(i32 %sgpr14) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR15}"(i32 %sgpr15) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR16}"(i32 %sgpr16) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR17}"(i32 %sgpr17) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR18}"(i32 %sgpr18) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR19}"(i32 %sgpr19) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR20}"(i32 %sgpr20) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR21}"(i32 %sgpr21) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR22}"(i32 %sgpr22) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR23}"(i32 %sgpr23) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR24}"(i32 %sgpr24) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR25}"(i32 %sgpr25) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR26}"(i32 %sgpr26) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR27}"(i32 %sgpr27) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR28}"(i32 %sgpr28) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR29}"(i32 %sgpr29) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR30}"(i32 %sgpr30) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR31}"(i32 %sgpr31) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR32}"(i32 %sgpr32) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR33}"(i32 %sgpr33) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR34}"(i32 %sgpr34) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR35}"(i32 %sgpr35) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR36}"(i32 %sgpr36) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR37}"(i32 %sgpr37) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR38}"(i32 %sgpr38) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR39}"(i32 %sgpr39) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR40}"(i32 %sgpr40) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR41}"(i32 %sgpr41) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR42}"(i32 %sgpr42) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR43}"(i32 %sgpr43) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR44}"(i32 %sgpr44) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR45}"(i32 %sgpr45) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR46}"(i32 %sgpr46) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR47}"(i32 %sgpr47) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR48}"(i32 %sgpr48) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR49}"(i32 %sgpr49) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR50}"(i32 %sgpr50) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR51}"(i32 %sgpr51) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR52}"(i32 %sgpr52) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR53}"(i32 %sgpr53) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR54}"(i32 %sgpr54) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR55}"(i32 %sgpr55) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR56}"(i32 %sgpr56) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR57}"(i32 %sgpr57) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR58}"(i32 %sgpr58) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR59}"(i32 %sgpr59) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR60}"(i32 %sgpr60) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR61}"(i32 %sgpr61) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR62}"(i32 %sgpr62) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR63}"(i32 %sgpr63) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR64}"(i32 %sgpr64) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR65}"(i32 %sgpr65) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR66}"(i32 %sgpr66) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR67}"(i32 %sgpr67) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR68}"(i32 %sgpr68) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR69}"(i32 %sgpr69) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR70}"(i32 %sgpr70) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR71}"(i32 %sgpr71) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR72}"(i32 %sgpr72) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR73}"(i32 %sgpr73) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR74}"(i32 %sgpr74) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR75}"(i32 %sgpr75) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR76}"(i32 %sgpr76) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR77}"(i32 %sgpr77) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR78}"(i32 %sgpr78) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR79}"(i32 %sgpr79) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR80}"(i32 %sgpr80) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR81}"(i32 %sgpr81) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR82}"(i32 %sgpr82) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR83}"(i32 %sgpr83) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR84}"(i32 %sgpr84) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR85}"(i32 %sgpr85) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR86}"(i32 %sgpr86) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR87}"(i32 %sgpr87) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR88}"(i32 %sgpr88) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR89}"(i32 %sgpr89) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR90}"(i32 %sgpr90) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR91}"(i32 %sgpr91) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR92}"(i32 %sgpr92) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR93}"(i32 %sgpr93) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR94}"(i32 %sgpr94) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR95}"(i32 %sgpr95) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR96}"(i32 %sgpr96) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR97}"(i32 %sgpr97) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR98}"(i32 %sgpr98) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR99}"(i32 %sgpr99) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR100}"(i32 %sgpr100) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR101}"(i32 %sgpr101) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR102}"(i32 %sgpr102) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR103}"(i32 %sgpr103) #0 - tail call void asm sideeffect "; reg use $0", "{VCC_LO}"(i32 %vcc_lo) #0 - tail call void asm sideeffect "; reg use $0", "{VCC_HI}"(i32 %vcc_hi) #0 + tail call void asm sideeffect "; reg use $0", "{s0}"(i32 %sgpr0) #0 + tail call void asm sideeffect "; reg use $0", "{s1}"(i32 %sgpr1) #0 + tail call void asm sideeffect "; reg use $0", "{s2}"(i32 %sgpr2) #0 + tail call void asm sideeffect "; reg use $0", "{s3}"(i32 %sgpr3) #0 + tail call void asm sideeffect "; reg use $0", "{s4}"(i32 %sgpr4) #0 + tail call void asm sideeffect "; reg use $0", "{s5}"(i32 %sgpr5) #0 + tail call void asm sideeffect "; reg use $0", "{s6}"(i32 %sgpr6) #0 + tail call void asm sideeffect "; reg use $0", "{s7}"(i32 %sgpr7) #0 + tail call void asm sideeffect "; reg use $0", "{s8}"(i32 %sgpr8) #0 + tail call void asm sideeffect "; reg use $0", "{s9}"(i32 %sgpr9) #0 + tail call void asm sideeffect "; reg use $0", "{s10}"(i32 %sgpr10) #0 + tail call void asm sideeffect "; reg use $0", "{s11}"(i32 %sgpr11) #0 + tail call void asm sideeffect "; reg use $0", "{s12}"(i32 %sgpr12) #0 + tail call void asm sideeffect "; reg use $0", "{s13}"(i32 %sgpr13) #0 + tail call void asm sideeffect "; reg use $0", "{s14}"(i32 %sgpr14) #0 + tail call void asm sideeffect "; reg use $0", "{s15}"(i32 %sgpr15) #0 + tail call void asm sideeffect "; reg use $0", "{s16}"(i32 %sgpr16) #0 + tail call void asm sideeffect "; reg use $0", "{s17}"(i32 %sgpr17) #0 + tail call void asm sideeffect "; reg use $0", "{s18}"(i32 %sgpr18) #0 + tail call void asm sideeffect "; reg use $0", "{s19}"(i32 %sgpr19) #0 + tail call void asm sideeffect "; reg use $0", "{s20}"(i32 %sgpr20) #0 + tail call void asm sideeffect "; reg use $0", "{s21}"(i32 %sgpr21) #0 + tail call void asm sideeffect "; reg use $0", "{s22}"(i32 %sgpr22) #0 + tail call void asm sideeffect "; reg use $0", "{s23}"(i32 %sgpr23) #0 + tail call void asm sideeffect "; reg use $0", "{s24}"(i32 %sgpr24) #0 + tail call void asm sideeffect "; reg use $0", "{s25}"(i32 %sgpr25) #0 + tail call void asm sideeffect "; reg use $0", "{s26}"(i32 %sgpr26) #0 + tail call void asm sideeffect "; reg use $0", "{s27}"(i32 %sgpr27) #0 + tail call void asm sideeffect "; reg use $0", "{s28}"(i32 %sgpr28) #0 + tail call void asm sideeffect "; reg use $0", "{s29}"(i32 %sgpr29) #0 + tail call void asm sideeffect "; reg use $0", "{s30}"(i32 %sgpr30) #0 + tail call void asm sideeffect "; reg use $0", "{s31}"(i32 %sgpr31) #0 + tail call void asm sideeffect "; reg use $0", "{s32}"(i32 %sgpr32) #0 + tail call void asm sideeffect "; reg use $0", "{s33}"(i32 %sgpr33) #0 + tail call void asm sideeffect "; reg use $0", "{s34}"(i32 %sgpr34) #0 + tail call void asm sideeffect "; reg use $0", "{s35}"(i32 %sgpr35) #0 + tail call void asm sideeffect "; reg use $0", "{s36}"(i32 %sgpr36) #0 + tail call void asm sideeffect "; reg use $0", "{s37}"(i32 %sgpr37) #0 + tail call void asm sideeffect "; reg use $0", "{s38}"(i32 %sgpr38) #0 + tail call void asm sideeffect "; reg use $0", "{s39}"(i32 %sgpr39) #0 + tail call void asm sideeffect "; reg use $0", "{s40}"(i32 %sgpr40) #0 + tail call void asm sideeffect "; reg use $0", "{s41}"(i32 %sgpr41) #0 + tail call void asm sideeffect "; reg use $0", "{s42}"(i32 %sgpr42) #0 + tail call void asm sideeffect "; reg use $0", "{s43}"(i32 %sgpr43) #0 + tail call void asm sideeffect "; reg use $0", "{s44}"(i32 %sgpr44) #0 + tail call void asm sideeffect "; reg use $0", "{s45}"(i32 %sgpr45) #0 + tail call void asm sideeffect "; reg use $0", "{s46}"(i32 %sgpr46) #0 + tail call void asm sideeffect "; reg use $0", "{s47}"(i32 %sgpr47) #0 + tail call void asm sideeffect "; reg use $0", "{s48}"(i32 %sgpr48) #0 + tail call void asm sideeffect "; reg use $0", "{s49}"(i32 %sgpr49) #0 + tail call void asm sideeffect "; reg use $0", "{s50}"(i32 %sgpr50) #0 + tail call void asm sideeffect "; reg use $0", "{s51}"(i32 %sgpr51) #0 + tail call void asm sideeffect "; reg use $0", "{s52}"(i32 %sgpr52) #0 + tail call void asm sideeffect "; reg use $0", "{s53}"(i32 %sgpr53) #0 + tail call void asm sideeffect "; reg use $0", "{s54}"(i32 %sgpr54) #0 + tail call void asm sideeffect "; reg use $0", "{s55}"(i32 %sgpr55) #0 + tail call void asm sideeffect "; reg use $0", "{s56}"(i32 %sgpr56) #0 + tail call void asm sideeffect "; reg use $0", "{s57}"(i32 %sgpr57) #0 + tail call void asm sideeffect "; reg use $0", "{s58}"(i32 %sgpr58) #0 + tail call void asm sideeffect "; reg use $0", "{s59}"(i32 %sgpr59) #0 + tail call void asm sideeffect "; reg use $0", "{s60}"(i32 %sgpr60) #0 + tail call void asm sideeffect "; reg use $0", "{s61}"(i32 %sgpr61) #0 + tail call void asm sideeffect "; reg use $0", "{s62}"(i32 %sgpr62) #0 + tail call void asm sideeffect "; reg use $0", "{s63}"(i32 %sgpr63) #0 + tail call void asm sideeffect "; reg use $0", "{s64}"(i32 %sgpr64) #0 + tail call void asm sideeffect "; reg use $0", "{s65}"(i32 %sgpr65) #0 + tail call void asm sideeffect "; reg use $0", "{s66}"(i32 %sgpr66) #0 + tail call void asm sideeffect "; reg use $0", "{s67}"(i32 %sgpr67) #0 + tail call void asm sideeffect "; reg use $0", "{s68}"(i32 %sgpr68) #0 + tail call void asm sideeffect "; reg use $0", "{s69}"(i32 %sgpr69) #0 + tail call void asm sideeffect "; reg use $0", "{s70}"(i32 %sgpr70) #0 + tail call void asm sideeffect "; reg use $0", "{s71}"(i32 %sgpr71) #0 + tail call void asm sideeffect "; reg use $0", "{s72}"(i32 %sgpr72) #0 + tail call void asm sideeffect "; reg use $0", "{s73}"(i32 %sgpr73) #0 + tail call void asm sideeffect "; reg use $0", "{s74}"(i32 %sgpr74) #0 + tail call void asm sideeffect "; reg use $0", "{s75}"(i32 %sgpr75) #0 + tail call void asm sideeffect "; reg use $0", "{s76}"(i32 %sgpr76) #0 + tail call void asm sideeffect "; reg use $0", "{s77}"(i32 %sgpr77) #0 + tail call void asm sideeffect "; reg use $0", "{s78}"(i32 %sgpr78) #0 + tail call void asm sideeffect "; reg use $0", "{s79}"(i32 %sgpr79) #0 + tail call void asm sideeffect "; reg use $0", "{s80}"(i32 %sgpr80) #0 + tail call void asm sideeffect "; reg use $0", "{s81}"(i32 %sgpr81) #0 + tail call void asm sideeffect "; reg use $0", "{s82}"(i32 %sgpr82) #0 + tail call void asm sideeffect "; reg use $0", "{s83}"(i32 %sgpr83) #0 + tail call void asm sideeffect "; reg use $0", "{s84}"(i32 %sgpr84) #0 + tail call void asm sideeffect "; reg use $0", "{s85}"(i32 %sgpr85) #0 + tail call void asm sideeffect "; reg use $0", "{s86}"(i32 %sgpr86) #0 + tail call void asm sideeffect "; reg use $0", "{s87}"(i32 %sgpr87) #0 + tail call void asm sideeffect "; reg use $0", "{s88}"(i32 %sgpr88) #0 + tail call void asm sideeffect "; reg use $0", "{s89}"(i32 %sgpr89) #0 + tail call void asm sideeffect "; reg use $0", "{s90}"(i32 %sgpr90) #0 + tail call void asm sideeffect "; reg use $0", "{s91}"(i32 %sgpr91) #0 + tail call void asm sideeffect "; reg use $0", "{s92}"(i32 %sgpr92) #0 + tail call void asm sideeffect "; reg use $0", "{s93}"(i32 %sgpr93) #0 + tail call void asm sideeffect "; reg use $0", "{s94}"(i32 %sgpr94) #0 + tail call void asm sideeffect "; reg use $0", "{s95}"(i32 %sgpr95) #0 + tail call void asm sideeffect "; reg use $0", "{s96}"(i32 %sgpr96) #0 + tail call void asm sideeffect "; reg use $0", "{s97}"(i32 %sgpr97) #0 + tail call void asm sideeffect "; reg use $0", "{s98}"(i32 %sgpr98) #0 + tail call void asm sideeffect "; reg use $0", "{s99}"(i32 %sgpr99) #0 + tail call void asm sideeffect "; reg use $0", "{s100}"(i32 %sgpr100) #0 + tail call void asm sideeffect "; reg use $0", "{s101}"(i32 %sgpr101) #0 + tail call void asm sideeffect "; reg use $0", "{s102}"(i32 %sgpr102) #0 + tail call void asm sideeffect "; reg use $0", "{s103}"(i32 %sgpr103) #0 + tail call void asm sideeffect "; reg use $0", "{vcc_lo}"(i32 %vcc_lo) #0 + tail call void asm sideeffect "; reg use $0", "{vcc_hi}"(i32 %vcc_hi) #0 ret void } Index: test/CodeGen/AMDGPU/exceed-max-sgprs.ll =================================================================== --- test/CodeGen/AMDGPU/exceed-max-sgprs.ll +++ test/CodeGen/AMDGPU/exceed-max-sgprs.ll @@ -2,97 +2,97 @@ ; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_tahiti define amdgpu_kernel void @use_too_many_sgprs_tahiti() #0 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" () - call void asm sideeffect "", "~{VCC}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:103]}" () + call void asm sideeffect "", "~{vcc}" () ret void } ; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_bonaire define amdgpu_kernel void @use_too_many_sgprs_bonaire() #1 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" () - call void asm sideeffect "", "~{VCC}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:103]}" () + call void asm sideeffect "", "~{vcc}" () ret void } ; ERROR: error: scalar registers limit of 104 exceeded (108) in use_too_many_sgprs_bonaire_flat_scr define amdgpu_kernel void @use_too_many_sgprs_bonaire_flat_scr() #1 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" () - call void asm sideeffect "", "~{VCC}" () - call void asm sideeffect "", "~{FLAT_SCR}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:103]}" () + call void asm sideeffect "", "~{vcc}" () + call void asm sideeffect "", "~{flat_scratch}" () ret void } ; ERROR: error: scalar registers limit of 96 exceeded (98) in use_too_many_sgprs_iceland define amdgpu_kernel void @use_too_many_sgprs_iceland() #2 { - call void asm sideeffect "", "~{VCC}" () - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () + call void asm sideeffect "", "~{vcc}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () ret void } ; ERROR: error: addressable scalar registers limit of 102 exceeded (103) in use_too_many_sgprs_fiji define amdgpu_kernel void @use_too_many_sgprs_fiji() #3 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99}" () - call void asm sideeffect "", "~{SGPR100_SGPR101}" () - call void asm sideeffect "", "~{SGPR102}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:99]}" () + call void asm sideeffect "", "~{s[100:101]}" () + call void asm sideeffect "", "~{s102}" () ret void } Index: test/CodeGen/AMDGPU/flat-scratch-reg.ll =================================================================== --- test/CodeGen/AMDGPU/flat-scratch-reg.ll +++ test/CodeGen/AMDGPU/flat-scratch-reg.ll @@ -21,7 +21,7 @@ ; VI-XNACK: ; NumSgprs: 12 define amdgpu_kernel void @no_vcc_no_flat() { entry: - call void asm sideeffect "", "~{SGPR7}"() + call void asm sideeffect "", "~{s7}"() ret void } @@ -35,7 +35,7 @@ ; VI-XNACK: ; NumSgprs: 12 define amdgpu_kernel void @vcc_no_flat() { entry: - call void asm sideeffect "", "~{SGPR7},~{VCC}"() + call void asm sideeffect "", "~{s7},~{vcc}"() ret void } @@ -52,7 +52,7 @@ ; HSA-VI-XNACK: ; NumSgprs: 14 define amdgpu_kernel void @no_vcc_flat() { entry: - call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"() + call void asm sideeffect "", "~{s7},~{flat_scratch}"() ret void } @@ -68,7 +68,7 @@ ; HSA-VI-XNACK: ; NumSgprs: 14 define amdgpu_kernel void @vcc_flat() { entry: - call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"() + call void asm sideeffect "", "~{s7},~{vcc},~{flat_scratch}"() ret void } @@ -81,7 +81,7 @@ ; VI-XNACK: NumSgprs: 6 define amdgpu_kernel void @use_flat_scr() #0 { entry: - call void asm sideeffect "; clobber ", "~{FLAT_SCR}"() + call void asm sideeffect "; clobber ", "~{flat_scratch}"() ret void } @@ -91,7 +91,7 @@ ; VI-XNACK: NumSgprs: 6 define amdgpu_kernel void @use_flat_scr_lo() #0 { entry: - call void asm sideeffect "; clobber ", "~{FLAT_SCR_LO}"() + call void asm sideeffect "; clobber ", "~{flat_scratch_lo}"() ret void } @@ -101,7 +101,7 @@ ; VI-XNACK: NumSgprs: 6 define amdgpu_kernel void @use_flat_scr_hi() #0 { entry: - call void asm sideeffect "; clobber ", "~{FLAT_SCR_HI}"() + call void asm sideeffect "; clobber ", "~{flat_scratch_hi}"() ret void } Index: test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll =================================================================== --- test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll +++ test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll @@ -5,40 +5,40 @@ ; GCN: ; illegal copy v1 to s9 define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_i32() #0 { - %vgpr = call i32 asm sideeffect "; def $0", "=${VGPR1}"() - call void asm sideeffect "; use $0", "${SGPR9}"(i32 %vgpr) + %vgpr = call i32 asm sideeffect "; def $0", "=${v1}"() + call void asm sideeffect "; use $0", "${s9}"(i32 %vgpr) ret void } ; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v2i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:1] to s[10:11] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v2i32() #0 { - %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1}"() - call void asm sideeffect "; use $0", "${SGPR10_SGPR11}"(<2 x i32> %vgpr) + %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${v[0:1]}"() + call void asm sideeffect "; use $0", "${s[10:11]}"(<2 x i32> %vgpr) ret void } ; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v4i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:3] to s[8:11] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v4i32() #0 { - %vgpr = call <4 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3}"() - call void asm sideeffect "; use $0", "${SGPR8_SGPR9_SGPR10_SGPR11}"(<4 x i32> %vgpr) + %vgpr = call <4 x i32> asm sideeffect "; def $0", "=${v[0:3]}"() + call void asm sideeffect "; use $0", "${s[8:11]}"(<4 x i32> %vgpr) ret void } ; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v8i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:7] to s[8:15] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v8i32() #0 { - %vgpr = call <8 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}"() - call void asm sideeffect "; use $0", "${SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}"(<8 x i32> %vgpr) + %vgpr = call <8 x i32> asm sideeffect "; def $0", "=${v[0:7]}"() + call void asm sideeffect "; use $0", "${s[8:15]}"(<8 x i32> %vgpr) ret void } ; ERR error: :0:0: in function illegal_vgpr_to_sgpr_copy_v16i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:15] to s[16:31] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v16i32() #0 { - %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}"() - call void asm sideeffect "; use $0", "${SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23_SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}"(<16 x i32> %vgpr) + %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${v[0:15]}"() + call void asm sideeffect "; use $0", "${s[16:31]}"(<16 x i32> %vgpr) ret void } Index: test/CodeGen/AMDGPU/indirect-addressing-si.ll =================================================================== --- test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -379,7 +379,7 @@ %idx0 = load volatile i32, i32 addrspace(1)* %gep %idx1 = add i32 %idx0, 1 %val0 = extractelement <4 x i32> , i32 %idx0 - %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={SGPR4}" () + %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={s4}" () %val1 = extractelement <4 x i32> , i32 %idx1 store volatile i32 %val0, i32 addrspace(1)* %out0 store volatile i32 %val1, i32 addrspace(1)* %out0 Index: test/CodeGen/AMDGPU/inline-asm.ll =================================================================== --- test/CodeGen/AMDGPU/inline-asm.ll +++ test/CodeGen/AMDGPU/inline-asm.ll @@ -193,7 +193,7 @@ ; CHECK: use v[0:1] define amdgpu_kernel void @i64_imm_input_phys_vgpr() { entry: - call void asm sideeffect "; use $0 ", "{VGPR0_VGPR1}"(i64 123456) + call void asm sideeffect "; use $0 ", "{v[0:1]}"(i64 123456) ret void } @@ -202,7 +202,7 @@ ; CHECK: ; use v0 define amdgpu_kernel void @i1_imm_input_phys_vgpr() { entry: - call void asm sideeffect "; use $0 ", "{VGPR0}"(i1 true) + call void asm sideeffect "; use $0 ", "{v0}"(i1 true) ret void } @@ -215,7 +215,7 @@ define amdgpu_kernel void @i1_input_phys_vgpr() { entry: %val = load i1, i1 addrspace(1)* undef - call void asm sideeffect "; use $0 ", "{VGPR0}"(i1 %val) + call void asm sideeffect "; use $0 ", "{v0}"(i1 %val) ret void } @@ -229,7 +229,7 @@ entry: %val0 = load volatile i1, i1 addrspace(1)* undef %val1 = load volatile i1, i1 addrspace(1)* undef - call void asm sideeffect "; use $0 $1 ", "{VGPR0}, {VGPR1}"(i1 %val0, i1 %val1) + call void asm sideeffect "; use $0 $1 ", "{v0}, {v1}"(i1 %val0, i1 %val1) ret void } @@ -240,8 +240,8 @@ ; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1 define amdgpu_kernel void @muliple_def_phys_vgpr() { entry: - %def0 = call i32 asm sideeffect "; def $0 ", "={VGPR0}"() - %def1 = call i32 asm sideeffect "; def $0 ", "={VGPR0}"() + %def0 = call i32 asm sideeffect "; def $0 ", "={v0}"() + %def1 = call i32 asm sideeffect "; def $0 ", "={v0}"() %add = shl i32 %def0, %def1 store i32 %add, i32 addrspace(1)* undef ret void Index: test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll @@ -4,18 +4,28 @@ declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0 ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8: -; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v5, v1 +; GCN-DAG: v_mov_b32_e32 v4, v0 define amdgpu_kernel void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { - %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0 + %tmp1 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0 + %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0 + store i64 %tmp2, i64 addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8_non_immediate: -; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_mqsad_pk_u16_u8 v[0:1], v[2:3], v4, v[6:7] +; GCN-DAG: v_mov_b32_e32 v3, v1 +; GCN-DAG: v_mov_b32_e32 v2, v0 define amdgpu_kernel void @v_mqsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) { - %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0 + %tmp3 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0 + %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0 + store i64 %tmp4, i64 addrspace(1)* %out, align 4 ret void } Index: test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll @@ -3,45 +3,56 @@ declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) #0 -; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_non_inline_constant: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @v_mqsad_u32_u8_use_non_inline_constant(<4 x i32> addrspace(1)* %out, i64 %src) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 100, <4 x i32> ) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 +; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate: +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] +define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> ) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_u32_u8_non_immediate: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] define amdgpu_kernel void @v_mqsad_u32_u8_non_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> %b) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %b) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 - ret void -} - -; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> ) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %b) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_fp_immediate: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] define amdgpu_kernel void @v_mqsad_u32_u8_inline_fp_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> ) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> ) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_sgpr_vgpr: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] define amdgpu_kernel void @v_mqsad_u32_u8_use_sgpr_vgpr(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> addrspace(1)* %input) { %in = load <4 x i32>, <4 x i32> addrspace(1) * %input - - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %in) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %in) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } Index: test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll @@ -4,18 +4,28 @@ declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0 ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8: -; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v5, v1 +; GCN-DAG: v_mov_b32_e32 v4, v0 define amdgpu_kernel void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { - %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0 + %tmp1 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0 + %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0 + store i64 %tmp2, i64 addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8_non_immediate: -; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_qsad_pk_u16_u8 v[0:1], v[2:3], v4, v[6:7] +; GCN-DAG: v_mov_b32_e32 v3, v1 +; GCN-DAG: v_mov_b32_e32 v2, v0 define amdgpu_kernel void @v_qsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) { - %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0 + %tmp3 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0 + %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0 + store i64 %tmp4, i64 addrspace(1)* %out, align 4 ret void } Index: test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll =================================================================== --- test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -608,11 +608,11 @@ ; GCN: ;;#ASMSTART ; GCN: ; use s[0:1] define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 { - call void asm sideeffect "", "~{VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}" () #0 - call void asm sideeffect "", "~{VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}" () #0 - call void asm sideeffect "", "~{VGPR16_VGPR17_VGPR18_VGPR19}"() #0 - call void asm sideeffect "", "~{VGPR20_VGPR21}"() #0 - call void asm sideeffect "", "~{VGPR22}"() #0 + call void asm sideeffect "", "~{v[0:7]}" () #0 + call void asm sideeffect "", "~{v[8:15]}" () #0 + call void asm sideeffect "", "~{v[16:19]}"() #0 + call void asm sideeffect "", "~{v[20:21]}"() #0 + call void asm sideeffect "", "~{v22}"() #0 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 Index: test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll =================================================================== --- test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll +++ test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll @@ -25,50 +25,50 @@ ; SMEM: s_dcache_wb ; ALL: s_endpgm define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}" () - call void asm sideeffect "", "~{VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}" () - call void asm sideeffect "", "~{VGPR16_VGPR17_VGPR18_VGPR19_VGPR20_VGPR21_VGPR22_VGPR23}" () - call void asm sideeffect "", "~{VGPR24_VGPR25_VGPR26_VGPR27_VGPR28_VGPR29_VGPR30_VGPR31}" () - call void asm sideeffect "", "~{VGPR32_VGPR33_VGPR34_VGPR35_VGPR36_VGPR37_VGPR38_VGPR39}" () - call void asm sideeffect "", "~{VGPR40_VGPR41_VGPR42_VGPR43_VGPR44_VGPR45_VGPR46_VGPR47}" () - call void asm sideeffect "", "~{VGPR48_VGPR49_VGPR50_VGPR51_VGPR52_VGPR53_VGPR54_VGPR55}" () - call void asm sideeffect "", "~{VGPR56_VGPR57_VGPR58_VGPR59_VGPR60_VGPR61_VGPR62_VGPR63}" () - call void asm sideeffect "", "~{VGPR64_VGPR65_VGPR66_VGPR67_VGPR68_VGPR69_VGPR70_VGPR71}" () - call void asm sideeffect "", "~{VGPR72_VGPR73_VGPR74_VGPR75_VGPR76_VGPR77_VGPR78_VGPR79}" () - call void asm sideeffect "", "~{VGPR80_VGPR81_VGPR82_VGPR83_VGPR84_VGPR85_VGPR86_VGPR87}" () - call void asm sideeffect "", "~{VGPR88_VGPR89_VGPR90_VGPR91_VGPR92_VGPR93_VGPR94_VGPR95}" () - call void asm sideeffect "", "~{VGPR96_VGPR97_VGPR98_VGPR99_VGPR100_VGPR101_VGPR102_VGPR103}" () - call void asm sideeffect "", "~{VGPR104_VGPR105_VGPR106_VGPR107_VGPR108_VGPR109_VGPR110_VGPR111}" () - call void asm sideeffect "", "~{VGPR112_VGPR113_VGPR114_VGPR115_VGPR116_VGPR117_VGPR118_VGPR119}" () - call void asm sideeffect "", "~{VGPR120_VGPR121_VGPR122_VGPR123_VGPR124_VGPR125_VGPR126_VGPR127}" () - call void asm sideeffect "", "~{VGPR128_VGPR129_VGPR130_VGPR131_VGPR132_VGPR133_VGPR134_VGPR135}" () - call void asm sideeffect "", "~{VGPR136_VGPR137_VGPR138_VGPR139_VGPR140_VGPR141_VGPR142_VGPR143}" () - call void asm sideeffect "", "~{VGPR144_VGPR145_VGPR146_VGPR147_VGPR148_VGPR149_VGPR150_VGPR151}" () - call void asm sideeffect "", "~{VGPR152_VGPR153_VGPR154_VGPR155_VGPR156_VGPR157_VGPR158_VGPR159}" () - call void asm sideeffect "", "~{VGPR160_VGPR161_VGPR162_VGPR163_VGPR164_VGPR165_VGPR166_VGPR167}" () - call void asm sideeffect "", "~{VGPR168_VGPR169_VGPR170_VGPR171_VGPR172_VGPR173_VGPR174_VGPR175}" () - call void asm sideeffect "", "~{VGPR176_VGPR177_VGPR178_VGPR179_VGPR180_VGPR181_VGPR182_VGPR183}" () - call void asm sideeffect "", "~{VGPR184_VGPR185_VGPR186_VGPR187_VGPR188_VGPR189_VGPR190_VGPR191}" () - call void asm sideeffect "", "~{VGPR192_VGPR193_VGPR194_VGPR195_VGPR196_VGPR197_VGPR198_VGPR199}" () - call void asm sideeffect "", "~{VGPR200_VGPR201_VGPR202_VGPR203_VGPR204_VGPR205_VGPR206_VGPR207}" () - call void asm sideeffect "", "~{VGPR208_VGPR209_VGPR210_VGPR211_VGPR212_VGPR213_VGPR214_VGPR215}" () - call void asm sideeffect "", "~{VGPR216_VGPR217_VGPR218_VGPR219_VGPR220_VGPR221_VGPR222_VGPR223}" () - call void asm sideeffect "", "~{VGPR224_VGPR225_VGPR226_VGPR227_VGPR228_VGPR229_VGPR230_VGPR231}" () - call void asm sideeffect "", "~{VGPR232_VGPR233_VGPR234_VGPR235_VGPR236_VGPR237_VGPR238_VGPR239}" () - call void asm sideeffect "", "~{VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247}" () - call void asm sideeffect "", "~{VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{v[0:7]}" () + call void asm sideeffect "", "~{v[8:15]}" () + call void asm sideeffect "", "~{v[16:23]}" () + call void asm sideeffect "", "~{v[24:31]}" () + call void asm sideeffect "", "~{v[32:39]}" () + call void asm sideeffect "", "~{v[40:47]}" () + call void asm sideeffect "", "~{v[48:55]}" () + call void asm sideeffect "", "~{v[56:63]}" () + call void asm sideeffect "", "~{v[64:71]}" () + call void asm sideeffect "", "~{v[72:79]}" () + call void asm sideeffect "", "~{v[80:87]}" () + call void asm sideeffect "", "~{v[88:95]}" () + call void asm sideeffect "", "~{v[96:103]}" () + call void asm sideeffect "", "~{v[104:111]}" () + call void asm sideeffect "", "~{v[112:119]}" () + call void asm sideeffect "", "~{v[120:127]}" () + call void asm sideeffect "", "~{v[128:135]}" () + call void asm sideeffect "", "~{v[136:143]}" () + call void asm sideeffect "", "~{v[144:151]}" () + call void asm sideeffect "", "~{v[152:159]}" () + call void asm sideeffect "", "~{v[160:167]}" () + call void asm sideeffect "", "~{v[168:175]}" () + call void asm sideeffect "", "~{v[176:183]}" () + call void asm sideeffect "", "~{v[184:191]}" () + call void asm sideeffect "", "~{v[192:199]}" () + call void asm sideeffect "", "~{v[200:207]}" () + call void asm sideeffect "", "~{v[208:215]}" () + call void asm sideeffect "", "~{v[216:223]}" () + call void asm sideeffect "", "~{v[224:231]}" () + call void asm sideeffect "", "~{v[232:239]}" () + call void asm sideeffect "", "~{v[240:247]}" () + call void asm sideeffect "", "~{v[248:255]}" () store i32 %in, i32 addrspace(1)* %out ret void Index: test/CodeGen/AMDGPU/skip-if-dead.ll =================================================================== --- test/CodeGen/AMDGPU/skip-if-dead.ll +++ test/CodeGen/AMDGPU/skip-if-dead.ll @@ -79,7 +79,7 @@ ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { call void @llvm.AMDGPU.kill(float %x) - %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={VGPR7}"() + %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"() call void @llvm.AMDGPU.kill(float %y) ret void } @@ -128,7 +128,7 @@ v_nop_e64 v_nop_e64 v_nop_e64 - v_nop_e64", "={VGPR7}"() + v_nop_e64", "={v7}"() call void @llvm.AMDGPU.kill(float %var) br label %exit @@ -186,11 +186,11 @@ v_nop_e64 v_nop_e64 v_nop_e64 - v_nop_e64", "={VGPR7}"() - %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={VGPR8}"() + v_nop_e64", "={v7}"() + %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"() call void @llvm.AMDGPU.kill(float %var) store volatile float %live.across, float addrspace(1)* undef - %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={VGPR9}"() + %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"() br label %exit exit: @@ -242,7 +242,7 @@ v_nop_e64 v_nop_e64 v_nop_e64 - v_nop_e64", "={VGPR7}"() + v_nop_e64", "={v7}"() call void @llvm.AMDGPU.kill(float %var) %vgpr = load volatile i32, i32 addrspace(1)* undef %loop.cond = icmp eq i32 %vgpr, 0 Index: test/CodeGen/AMDGPU/spill-scavenge-offset.ll =================================================================== --- test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -20,13 +20,13 @@ %a = load <1280 x i32>, <1280 x i32> addrspace(1)* %aptr ; mark most VGPR registers as used to increase register pressure - call void asm sideeffect "", "~{VGPR4},~{VGPR8},~{VGPR12},~{VGPR16},~{VGPR20},~{VGPR24},~{VGPR28},~{VGPR32}" () - call void asm sideeffect "", "~{VGPR36},~{VGPR40},~{VGPR44},~{VGPR48},~{VGPR52},~{VGPR56},~{VGPR60},~{VGPR64}" () - call void asm sideeffect "", "~{VGPR68},~{VGPR72},~{VGPR76},~{VGPR80},~{VGPR84},~{VGPR88},~{VGPR92},~{VGPR96}" () - call void asm sideeffect "", "~{VGPR100},~{VGPR104},~{VGPR108},~{VGPR112},~{VGPR116},~{VGPR120},~{VGPR124},~{VGPR128}" () - call void asm sideeffect "", "~{VGPR132},~{VGPR136},~{VGPR140},~{VGPR144},~{VGPR148},~{VGPR152},~{VGPR156},~{VGPR160}" () - call void asm sideeffect "", "~{VGPR164},~{VGPR168},~{VGPR172},~{VGPR176},~{VGPR180},~{VGPR184},~{VGPR188},~{VGPR192}" () - call void asm sideeffect "", "~{VGPR196},~{VGPR200},~{VGPR204},~{VGPR208},~{VGPR212},~{VGPR216},~{VGPR220},~{VGPR224}" () + call void asm sideeffect "", "~{v4},~{v8},~{v12},~{v16},~{v20},~{v24},~{v28},~{v32}" () + call void asm sideeffect "", "~{v36},~{v40},~{v44},~{v48},~{v52},~{v56},~{v60},~{v64}" () + call void asm sideeffect "", "~{v68},~{v72},~{v76},~{v80},~{v84},~{v88},~{v92},~{v96}" () + call void asm sideeffect "", "~{v100},~{v104},~{v108},~{v112},~{v116},~{v120},~{v124},~{v128}" () + call void asm sideeffect "", "~{v132},~{v136},~{v140},~{v144},~{v148},~{v152},~{v156},~{v160}" () + call void asm sideeffect "", "~{v164},~{v168},~{v172},~{v176},~{v180},~{v184},~{v188},~{v192}" () + call void asm sideeffect "", "~{v196},~{v200},~{v204},~{v208},~{v212},~{v216},~{v220},~{v224}" () %outptr = getelementptr <1280 x i32>, <1280 x i32> addrspace(1)* %out, i32 %tid store <1280 x i32> %a, <1280 x i32> addrspace(1)* %outptr Index: test/CodeGen/AMDGPU/undefined-subreg-liverange.ll =================================================================== --- test/CodeGen/AMDGPU/undefined-subreg-liverange.ll +++ test/CodeGen/AMDGPU/undefined-subreg-liverange.ll @@ -73,14 +73,14 @@ ; CHECK: buffer_store_dwordx4 v{{\[}}[[OUTPUT_LO]]:[[OUTPUT_HI]]{{\]}} define amdgpu_kernel void @partially_undef_copy() #0 { - %tmp0 = call i32 asm sideeffect "v_mov_b32_e32 v5, 5", "={VGPR5}"() - %tmp1 = call i32 asm sideeffect "v_mov_b32_e32 v6, 6", "={VGPR6}"() + %tmp0 = call i32 asm sideeffect "v_mov_b32_e32 v5, 5", "={v5}"() + %tmp1 = call i32 asm sideeffect "v_mov_b32_e32 v6, 6", "={v6}"() %partially.undef.0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0 %partially.undef.1 = insertelement <4 x i32> %partially.undef.0, i32 %tmp1, i32 0 store volatile <4 x i32> %partially.undef.1, <4 x i32> addrspace(1)* undef, align 16 - tail call void asm sideeffect "v_nop", "v={VGPR5_VGPR6_VGPR7_VGPR8}"(<4 x i32> %partially.undef.0) + tail call void asm sideeffect "v_nop", "v={v[5:8]}"(<4 x i32> %partially.undef.0) ret void } Index: test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir =================================================================== --- /dev/null +++ test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir @@ -0,0 +1,149 @@ +# RUN: llc -O0 -mtriple arm-- -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +--- | + define void @test_mla() #0 { ret void } + define void @test_mla_v5() #1 { ret void } + + define void @test_mls() #2 { ret void } + define void @test_no_mls() { ret void } + + attributes #0 = { "target-features"="+v6" } + attributes #1 = { "target-features"="-v6" } + attributes #2 = { "target-features"="+v6t2" } +... +--- +name: test_mla +# CHECK-LABEL: name: test_mla +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + + %3(s32) = G_MUL %0, %1 + %4(s32) = G_ADD %3, %2 + ; CHECK: [[VREGR:%[0-9]+]] = MLA [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _ + + %r0 = COPY %4(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_mla_v5 +# CHECK-LABEL: name: test_mla_v5 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + + %3(s32) = G_MUL %0, %1 + %4(s32) = G_ADD %3, %2 + ; CHECK: [[VREGR:%[0-9]+]] = MLAv5 [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _ + + %r0 = COPY %4(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_mls +# CHECK-LABEL: name: test_mls +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + + %3(s32) = G_MUL %0, %1 + %4(s32) = G_SUB %2, %3 + ; CHECK: [[VREGR:%[0-9]+]] = MLS [[VREGX]], [[VREGY]], [[VREGZ]], 14, _ + + %r0 = COPY %4(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_no_mls +# CHECK-LABEL: name: test_no_mls +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + + %3(s32) = G_MUL %0, %1 + %4(s32) = G_SUB %2, %3 + ; CHECK: [[VREGM:%[0-9]+]] = MULv5 [[VREGX]], [[VREGY]], 14, _, _ + ; CHECK: [[VREGR:%[0-9]+]] = SUBrr [[VREGZ]], [[VREGM]], 14, _, _ + + %r0 = COPY %4(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... Index: test/CodeGen/Hexagon/mux-undef.ll =================================================================== --- /dev/null +++ test/CodeGen/Hexagon/mux-undef.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s +; +; Make sure this test compiles successfully. +; CHECK: jumpr r31 + +target triple = "hexagon--elf" + +; Function Attrs: nounwind +define i32 @fred() #0 { +b0: + call void @foo() #0 + br label %b1 + +b1: ; preds = %b0 + br i1 undef, label %b2, label %b3 + +b2: ; preds = %b1 + br label %b3 + +b3: ; preds = %b2, %b1 + %v4 = phi i32 [ 1, %b1 ], [ 2, %b2 ] + ret i32 %v4 +} + +declare void @foo() #0 + +attributes #0 = { nounwind "target-cpu"="hexagonv60" } Index: test/CodeGen/PowerPC/BoolRetToIntTest-2.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/BoolRetToIntTest-2.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=powerpc64le-linux-gnu -mcpu=pwr8 < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=32442 +; Don't generate zero extension for the return value. +; CHECK-NOT: clrldi + +define zeroext i1 @foo(i32 signext %i, i32* %p) { +entry: + %cmp = icmp eq i32 %i, 0 + br i1 %cmp, label %return, label %if.end + +if.end: + store i32 %i, i32* %p, align 4 + br label %return + +return: + %retval = phi i1 [ true, %if.end ], [ false, %entry ] + ret i1 %retval +} Index: test/CodeGen/PowerPC/BoolRetToIntTest.ll =================================================================== --- test/CodeGen/PowerPC/BoolRetToIntTest.ll +++ test/CodeGen/PowerPC/BoolRetToIntTest.ll @@ -31,14 +31,14 @@ br i1 %call, label %cleanup.loopexit, label %for.cond cleanup.loopexit: ; preds = %for.body, %for.cond -; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] +; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ] %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] br label %cleanup cleanup: ; preds = %cleanup.loopexit, %entry -; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] +; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: ret i1 [[REG]] ret i1 %cleanup.dest.slot.0 } @@ -78,14 +78,14 @@ br i1 %call, label %cleanup.loopexit, label %for.cond cleanup.loopexit: ; preds = %for.body, %for.cond -; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] +; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ] %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] br label %cleanup cleanup: ; preds = %cleanup.loopexit, %entry -; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] +; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: call void %cont(i1 [[REG]] tail call void %cont(i1 %cleanup.dest.slot.0) ret void @@ -112,17 +112,17 @@ br i1 %call, label %cleanup.loopexit, label %for.cond cleanup.loopexit: ; preds = %for.body, %for.cond -; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] +; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ] %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] br label %cleanup cleanup: ; preds = %cleanup.loopexit, %entry -; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] +; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: call void %cont(i1 [[REG]] tail call void %cont(i1 %cleanup.dest.slot.0) -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: ret i1 [[REG]] ret i1 %cleanup.dest.slot.0 } @@ -136,7 +136,7 @@ br label %cleanup cleanup: -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: ret i1 [[REG]] %result = phi i1 [ false, %foo ], [ %operand, %entry ] ret i1 %result @@ -186,7 +186,7 @@ ; CHECK-LABEL: cleanup cleanup: -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: ret i1 [[REG]] %result = phi i1 [ %bar, %foo], [ %operand, %entry ] ret i1 %result @@ -198,8 +198,8 @@ define zeroext i1 @call_test() { ; CHECK: [[REG:%.+]] = call i1 %result = call i1 @return_i1() -; CHECK: [[REG:%.+]] = zext i1 {{%.+}} to i32 -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = zext i1 {{%.+}} to i64 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: ret i1 [[REG]] ret i1 %result } Index: test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll =================================================================== --- test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -17,13 +17,13 @@ ; Check 4 bytes - requires 1 load for each param. define signext i32 @zeroEqualityTest02(i8* %x, i8* %y) { ; CHECK-LABEL: zeroEqualityTest02: -; CHECK: # BB#0: # %loadbb +; CHECK: # BB#0: ; CHECK-NEXT: lwz 3, 0(3) ; CHECK-NEXT: lwz 4, 0(4) -; CHECK-NEXT: li 5, 1 -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: isel 3, 0, 5, 2 -; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: xor 3, 3, 4 +; CHECK-NEXT: cntlzw 3, 3 +; CHECK-NEXT: srwi 3, 3, 5 +; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 4) %not.cmp = icmp ne i32 %call, 0 @@ -196,3 +196,27 @@ ret i32 %cond } +define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) { +; CHECK-LABEL: length2_eq_nobuiltin_attr: +; CHECK: # BB#0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -32(1) +; CHECK-NEXT: .Lcfi0: +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .Lcfi1: +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: li 5, 2 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: nop +; CHECK-NEXT: cntlzw 3, 3 +; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31 +; CHECK-NEXT: addi 1, 1, 32 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr + %m = tail call signext i32 @memcmp(i8* %X, i8* %Y, i64 2) nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + Index: test/CodeGen/PowerPC/vec_int_ext.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/vec_int_ext.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck %s -check-prefix=PWR9 +target triple = "powerpc64le-unknown-linux-gnu" + +define <4 x i32> @vextsb2w(<16 x i8> %a) { +; PWR9-LABEL: vextsb2w: +; PWR9: # BB#0: # %entry +; PWR9-NEXT: vextsb2w 2, 2 +; PWR9-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 0 + %conv = sext i8 %vecext to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 4 + %conv2 = sext i8 %vecext1 to i32 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 + %vecext4 = extractelement <16 x i8> %a, i32 8 + %conv5 = sext i8 %vecext4 to i32 + %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 + %vecext7 = extractelement <16 x i8> %a, i32 12 + %conv8 = sext i8 %vecext7 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 + ret <4 x i32> %vecinit9 +} + +define <2 x i64> @vextsb2d(<16 x i8> %a) { +; PWR9-LABEL: vextsb2d: +; PWR9: # BB#0: # %entry +; PWR9-NEXT: vextsb2d 2, 2 +; PWR9-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 0 + %conv = sext i8 %vecext to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 8 + %conv2 = sext i8 %vecext1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +} + +define <4 x i32> @vextsh2w(<8 x i16> %a) { +; PWR9-LABEL: vextsh2w: +; PWR9: # BB#0: # %entry +; PWR9-NEXT: vextsh2w 2, 2 +; PWR9-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = sext i16 %vecext to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 2 + %conv2 = sext i16 %vecext1 to i32 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 + %vecext4 = extractelement <8 x i16> %a, i32 4 + %conv5 = sext i16 %vecext4 to i32 + %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 + %vecext7 = extractelement <8 x i16> %a, i32 6 + %conv8 = sext i16 %vecext7 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 + ret <4 x i32> %vecinit9 +} + +define <2 x i64> @vextsh2d(<8 x i16> %a) { +; PWR9-LABEL: vextsh2d: +; PWR9: # BB#0: # %entry +; PWR9-NEXT: vextsh2d 2, 2 +; PWR9-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = sext i16 %vecext to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 4 + %conv2 = sext i16 %vecext1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +} + +define <2 x i64> @vextsw2d(<4 x i32> %a) { +; PWR9-LABEL: vextsw2d: +; PWR9: # BB#0: # %entry +; PWR9-NEXT: vextsw2d 2, 2 +; PWR9-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = sext i32 %vecext to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %vecext1 = extractelement <4 x i32> %a, i32 2 + %conv2 = sext i32 %vecext1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +} Index: test/CodeGen/WebAssembly/offset-fastisel.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/offset-fastisel.ll @@ -0,0 +1,100 @@ +; RUN: llc < %s -asm-verbose=false -disable-wasm-explicit-locals -fast-isel | FileCheck %s + +; TODO: Merge this with offset.ll when fast-isel matches better. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown-elf" + +; CHECK-LABEL: store_i8_with_variable_gep_offset: +; CHECK: i32.add $push[[L0:[0-9]+]]=, $0, $1{{$}} +; CHECK: i32.const $push[[L1:[0-9]+]]=, 0{{$}} +; CHECK: i32.store8 0($pop[[L0]]), $pop[[L1]]{{$}} +define void @store_i8_with_variable_gep_offset(i8* %p, i32 %idx) { + %s = getelementptr inbounds i8, i8* %p, i32 %idx + store i8 0, i8* %s + ret void +} + +; CHECK-LABEL: store_i8_with_array_alloca_gep: +; CHECK: i32.const $push[[L0:[0-9]+]]=, 0{{$}} +; CHECK: i32.load $push[[L1:[0-9]+]]=, __stack_pointer($pop[[L0]]){{$}} +; CHECK: i32.const $push[[L2:[0-9]+]]=, 32{{$}} +; CHECK: i32.sub $push{{[0-9]+}}=, $pop[[L1]], $pop[[L2]]{{$}} +; CHECK: i32.add $push[[L4:[0-9]+]]=, $pop{{[0-9]+}}, $0{{$}} +; CHECK: i32.const $push[[L5:[0-9]+]]=, 0{{$}} +; CHECK: i32.store8 0($pop[[L4]]), $pop[[L5]]{{$}} +define hidden void @store_i8_with_array_alloca_gep(i32 %idx) { + %A = alloca [30 x i8], align 16 + %s = getelementptr inbounds [30 x i8], [30 x i8]* %A, i32 0, i32 %idx + store i8 0, i8* %s, align 1 + ret void +} + +; CHECK-LABEL: store_i32_with_unfolded_gep_offset: +; CHECK: i32.const $push[[L0:[0-9]+]]=, 24{{$}} +; CHECK: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} +; CHECK: i32.const $push[[L2:[0-9]+]]=, 0{{$}} +; CHECK: i32.store 0($pop[[L1]]), $pop[[L2]]{{$}} +define void @store_i32_with_unfolded_gep_offset(i32* %p) { + %s = getelementptr i32, i32* %p, i32 6 + store i32 0, i32* %s + ret void +} + +; CHECK-LABEL: store_i32_with_folded_gep_offset: +; CHECK: i32.store 24($0), $pop{{[0-9]+$}} +define void @store_i32_with_folded_gep_offset(i32* %p) { + %s = getelementptr inbounds i32, i32* %p, i32 6 + store i32 0, i32* %s + ret void +} + +; CHECK-LABEL: load_i32_with_folded_gep_offset: +; CHECK: i32.load $push{{[0-9]+}}=, 24($0){{$}} +define i32 @load_i32_with_folded_gep_offset(i32* %p) { + %s = getelementptr inbounds i32, i32* %p, i32 6 + %t = load i32, i32* %s + ret i32 %t +} + +; CHECK-LABEL: store_i64_with_unfolded_gep_offset: +; CHECK: i32.const $push[[L0:[0-9]+]]=, 24{{$}} +; CHECK: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} +; CHECK: i64.const $push[[L2:[0-9]+]]=, 0{{$}} +; CHECK: i64.store 0($pop[[L1]]), $pop[[L2]]{{$}} +define void @store_i64_with_unfolded_gep_offset(i64* %p) { + %s = getelementptr i64, i64* %p, i32 3 + store i64 0, i64* %s + ret void +} + +; CHECK-LABEL: store_i8_with_folded_gep_offset: +; CHECK: i32.store8 24($0), $pop{{[0-9]+$}} +define void @store_i8_with_folded_gep_offset(i8* %p) { + %s = getelementptr inbounds i8, i8* %p, i32 24 + store i8 0, i8* %s + ret void +} + +; CHECK-LABEL: load_i8_u_with_folded_offset: +; CHECK: i32.load8_u $push{{[0-9]+}}=, 24($0){{$}} +define i32 @load_i8_u_with_folded_offset(i8* %p) { + %q = ptrtoint i8* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i8* + %t = load i8, i8* %s + %u = zext i8 %t to i32 + ret i32 %u +} + +; TODO: this should be load8_s, need to fold sign-/zero-extend in fast-isel +; CHECK-LABEL: load_i8_s_with_folded_offset: +; CHECK: i32.load8_u $push{{[0-9]+}}=, 24($0){{$}} +define i32 @load_i8_s_with_folded_offset(i8* %p) { + %q = ptrtoint i8* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i8* + %t = load i8, i8* %s + %u = sext i8 %t to i32 + ret i32 %u +} Index: test/CodeGen/X86/avx-schedule.ll =================================================================== --- test/CodeGen/X86/avx-schedule.ll +++ test/CodeGen/X86/avx-schedule.ll @@ -910,14 +910,14 @@ ; ; BTVER2-LABEL: test_haddpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_haddpd: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; ZNVER1-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; ZNVER1-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) %2 = load <4 x double>, <4 x double> *%a2, align 32 @@ -941,14 +941,14 @@ ; ; BTVER2-LABEL: test_haddps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_haddps: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; ZNVER1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; ZNVER1-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) %2 = load <8 x float>, <8 x float> *%a2, align 32 @@ -972,14 +972,14 @@ ; ; BTVER2-LABEL: test_hsubpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_hsubpd: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; ZNVER1-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; ZNVER1-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) %2 = load <4 x double>, <4 x double> *%a2, align 32 @@ -1003,14 +1003,14 @@ ; ; BTVER2-LABEL: test_hsubps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_hsubps: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; ZNVER1-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; ZNVER1-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) %2 = load <8 x float>, <8 x float> *%a2, align 32 Index: test/CodeGen/X86/memcmp.ll =================================================================== --- test/CodeGen/X86/memcmp.ll +++ test/CodeGen/X86/memcmp.ll @@ -10,9 +10,28 @@ declare i32 @memcmp(i8*, i8*, i64) -define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind { +define i32 @length2(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length2: ; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $2 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length2: +; X64: # BB#0: +; X64-NEXT: movl $2, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind + ret i32 %m +} + +define i1 @length2_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length2_eq: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movzwl (%ecx), %ecx @@ -20,7 +39,7 @@ ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; X64-LABEL: length2: +; X64-LABEL: length2_eq: ; X64: # BB#0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: cmpw (%rsi), %ax @@ -31,8 +50,8 @@ ret i1 %c } -define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length2_const: +define i1 @length2_eq_const(i8* %X) nounwind { +; X32-LABEL: length2_eq_const: ; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movzwl (%eax), %eax @@ -40,7 +59,7 @@ ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; X64-LABEL: length2_const: +; X64-LABEL: length2_eq_const: ; X64: # BB#0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: cmpl $12849, %eax # imm = 0x3231 @@ -51,8 +70,8 @@ ret i1 %c } -define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind { -; X32-LABEL: length2_nobuiltin_attr: +define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length2_eq_nobuiltin_attr: ; X32: # BB#0: ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $2 @@ -64,7 +83,7 @@ ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; X64-LABEL: length2_nobuiltin_attr: +; X64-LABEL: length2_eq_nobuiltin_attr: ; X64: # BB#0: ; X64-NEXT: pushq %rax ; X64-NEXT: movl $2, %edx @@ -78,9 +97,74 @@ ret i1 %c } -define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind { +define i32 @length3(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length3: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $3 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length3: +; X64: # BB#0: +; X64-NEXT: movl $3, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length3_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $3 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: testl %eax, %eax +; X32-NEXT: setne %al +; X32-NEXT: retl +; +; X64-LABEL: length3_eq: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $3, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length4: ; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $4 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length4: +; X64: # BB#0: +; X64-NEXT: movl $4, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length4_eq: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%ecx), %ecx @@ -88,7 +172,7 @@ ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; X64-LABEL: length4: +; X64-LABEL: length4_eq: ; X64: # BB#0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: cmpl (%rsi), %eax @@ -99,15 +183,15 @@ ret i1 %c } -define i1 @length4_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length4_const: +define i1 @length4_eq_const(i8* %X) nounwind { +; X32-LABEL: length4_eq_const: ; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231 ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; X64-LABEL: length4_const: +; X64-LABEL: length4_eq_const: ; X64: # BB#0: ; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231 ; X64-NEXT: sete %al @@ -117,7 +201,53 @@ ret i1 %c } -define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind { +define i32 @length5(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length5: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $5 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length5: +; X64: # BB#0: +; X64-NEXT: movl $5, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length5_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $5 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: testl %eax, %eax +; X32-NEXT: setne %al +; X32-NEXT: retl +; +; X64-LABEL: length5_eq: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $5, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length8(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length8: ; X32: # BB#0: ; X32-NEXT: pushl $0 @@ -126,11 +256,30 @@ ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: calll memcmp ; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length8: +; X64: # BB#0: +; X64-NEXT: movl $8, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length8_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $8 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp ; X32-NEXT: testl %eax, %eax ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; X64-LABEL: length8: +; X64-LABEL: length8_eq: ; X64: # BB#0: ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: cmpq (%rsi), %rax @@ -141,8 +290,8 @@ ret i1 %c } -define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length8_const: +define i1 @length8_eq_const(i8* %X) nounwind { +; X32-LABEL: length8_eq_const: ; X32: # BB#0: ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $8 @@ -154,7 +303,7 @@ ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; X64-LABEL: length8_const: +; X64-LABEL: length8_eq_const: ; X64: # BB#0: ; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130 ; X64-NEXT: cmpq %rax, (%rdi) @@ -165,7 +314,55 @@ ret i1 %c } -define i1 @length16(i8* %x, i8* %y) nounwind { +define i1 @length12_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length12_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $12 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: testl %eax, %eax +; X32-NEXT: setne %al +; X32-NEXT: retl +; +; X64-LABEL: length12_eq: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $12, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length12: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $12 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length12: +; X64: # BB#0: +; X64-NEXT: movl $12, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind + ret i32 %m +} + +; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 + +define i32 @length16(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length16: ; X32: # BB#0: ; X32-NEXT: pushl $0 @@ -174,11 +371,30 @@ ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: calll memcmp ; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length16: +; X64: # BB#0: +; X64-NEXT: movl $16, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(i8* %x, i8* %y) nounwind { +; X32-LABEL: length16_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $16 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp ; X32-NEXT: testl %eax, %eax ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; SSE2-LABEL: length16: +; SSE2-LABEL: length16_eq: ; SSE2: # BB#0: ; SSE2-NEXT: movdqu (%rsi), %xmm0 ; SSE2-NEXT: movdqu (%rdi), %xmm1 @@ -188,7 +404,7 @@ ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; -; AVX2-LABEL: length16: +; AVX2-LABEL: length16_eq: ; AVX2: # BB#0: ; AVX2-NEXT: vmovdqu (%rdi), %xmm0 ; AVX2-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0 @@ -201,8 +417,8 @@ ret i1 %cmp } -define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length16_const: +define i1 @length16_eq_const(i8* %X) nounwind { +; X32-LABEL: length16_eq_const: ; X32: # BB#0: ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $16 @@ -214,7 +430,7 @@ ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; SSE2-LABEL: length16_const: +; SSE2-LABEL: length16_eq_const: ; SSE2: # BB#0: ; SSE2-NEXT: movdqu (%rdi), %xmm0 ; SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0 @@ -223,7 +439,7 @@ ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; -; AVX2-LABEL: length16_const: +; AVX2-LABEL: length16_eq_const: ; AVX2: # BB#0: ; AVX2-NEXT: vmovdqu (%rdi), %xmm0 ; AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0 @@ -236,7 +452,7 @@ ret i1 %c } -define i1 @length32(i8* %x, i8* %y) nounwind { +define i32 @length32(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length32: ; X32: # BB#0: ; X32-NEXT: pushl $0 @@ -245,11 +461,32 @@ ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: calll memcmp ; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length32: +; X64: # BB#0: +; X64-NEXT: movl $32, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind + ret i32 %m +} + +; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 + +define i1 @length32_eq(i8* %x, i8* %y) nounwind { +; X32-LABEL: length32_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $32 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp ; X32-NEXT: testl %eax, %eax ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; SSE2-LABEL: length32: +; SSE2-LABEL: length32_eq: ; SSE2: # BB#0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movl $32, %edx @@ -259,7 +496,7 @@ ; SSE2-NEXT: popq %rcx ; SSE2-NEXT: retq ; -; AVX2-LABEL: length32: +; AVX2-LABEL: length32_eq: ; AVX2: # BB#0: ; AVX2-NEXT: vmovdqu (%rdi), %ymm0 ; AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0 @@ -273,8 +510,8 @@ ret i1 %cmp } -define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length32_const: +define i1 @length32_eq_const(i8* %X) nounwind { +; X32-LABEL: length32_eq_const: ; X32: # BB#0: ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $32 @@ -286,7 +523,7 @@ ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; SSE2-LABEL: length32_const: +; SSE2-LABEL: length32_eq_const: ; SSE2: # BB#0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movl $.L.str, %esi @@ -297,7 +534,7 @@ ; SSE2-NEXT: popq %rcx ; SSE2-NEXT: retq ; -; AVX2-LABEL: length32_const: +; AVX2-LABEL: length32_eq_const: ; AVX2: # BB#0: ; AVX2-NEXT: vmovdqu (%rdi), %ymm0 ; AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0 @@ -311,7 +548,7 @@ ret i1 %c } -define i1 @length64(i8* %x, i8* %y) nounwind { +define i32 @length64(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length64: ; X32: # BB#0: ; X32-NEXT: pushl $0 @@ -320,11 +557,30 @@ ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: calll memcmp ; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length64: +; X64: # BB#0: +; X64-NEXT: movl $64, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(i8* %x, i8* %y) nounwind { +; X32-LABEL: length64_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $64 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp ; X32-NEXT: testl %eax, %eax ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; X64-LABEL: length64: +; X64-LABEL: length64_eq: ; X64: # BB#0: ; X64-NEXT: pushq %rax ; X64-NEXT: movl $64, %edx @@ -338,8 +594,8 @@ ret i1 %cmp } -define i1 @length64_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length64_const: +define i1 @length64_eq_const(i8* %X) nounwind { +; X32-LABEL: length64_eq_const: ; X32: # BB#0: ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $64 @@ -351,7 +607,7 @@ ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; X64-LABEL: length64_const: +; X64-LABEL: length64_eq_const: ; X64: # BB#0: ; X64-NEXT: pushq %rax ; X64-NEXT: movl $.L.str, %esi Index: test/CodeGen/X86/mul-constant-i16.ll =================================================================== --- test/CodeGen/X86/mul-constant-i16.ll +++ test/CodeGen/X86/mul-constant-i16.ll @@ -188,13 +188,16 @@ ; X86-LABEL: test_mul_by_11: ; X86: # BB#0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $11, %eax, %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_11: ; X64: # BB#0: -; X64-NEXT: imull $11, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,4), %eax +; X64-NEXT: leal (%rdi,%rax,2), %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 11 @@ -225,13 +228,16 @@ ; X86-LABEL: test_mul_by_13: ; X86: # BB#0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $13, %eax, %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_13: ; X64: # BB#0: -; X64-NEXT: imull $13, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,2), %eax +; X64-NEXT: leal (%rdi,%rax,4), %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 13 @@ -241,14 +247,19 @@ define i16 @test_mul_by_14(i16 %x) { ; X86-LABEL: test_mul_by_14: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $14, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %eax +; X86-NEXT: leal (%ecx,%eax,4), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_14: ; X64: # BB#0: -; X64-NEXT: imull $14, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,2), %eax +; X64-NEXT: leal (%rdi,%rax,4), %eax +; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 14 @@ -337,14 +348,19 @@ define i16 @test_mul_by_19(i16 %x) { ; X86-LABEL: test_mul_by_19: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $19, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: shll $2, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_19: ; X64: # BB#0: -; X64-NEXT: imull $19, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,4), %eax +; X64-NEXT: shll $2, %eax +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 19 @@ -375,13 +391,16 @@ ; X86-LABEL: test_mul_by_21: ; X86: # BB#0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $21, %eax, %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_21: ; X64: # BB#0: -; X64-NEXT: imull $21, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,4), %eax +; X64-NEXT: leal (%rdi,%rax,4), %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 21 @@ -391,14 +410,19 @@ define i16 @test_mul_by_22(i16 %x) { ; X86-LABEL: test_mul_by_22: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $22, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: leal (%ecx,%eax,4), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_22: ; X64: # BB#0: -; X64-NEXT: imull $22, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,4), %eax +; X64-NEXT: leal (%rdi,%rax,4), %eax +; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 22 @@ -408,14 +432,19 @@ define i16 @test_mul_by_23(i16 %x) { ; X86-LABEL: test_mul_by_23: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $23, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %eax +; X86-NEXT: shll $3, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_23: ; X64: # BB#0: -; X64-NEXT: imull $23, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,2), %eax +; X64-NEXT: shll $3, %eax +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 23 @@ -465,14 +494,19 @@ define i16 @test_mul_by_26(i16 %x) { ; X86-LABEL: test_mul_by_26: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $26, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_26: ; X64: # BB#0: -; X64-NEXT: imull $26, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,8), %eax +; X64-NEXT: leal (%rax,%rax,2), %eax +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 26 @@ -502,14 +536,19 @@ define i16 @test_mul_by_28(i16 %x) { ; X86-LABEL: test_mul_by_28: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $28, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_28: ; X64: # BB#0: -; X64-NEXT: imull $28, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,8), %eax +; X64-NEXT: leal (%rax,%rax,2), %eax +; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 28 @@ -519,14 +558,21 @@ define i16 @test_mul_by_29(i16 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $29, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_29: ; X64: # BB#0: -; X64-NEXT: imull $29, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,8), %eax +; X64-NEXT: leal (%rax,%rax,2), %eax +; X64-NEXT: addl %edi, %eax +; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 29 @@ -536,14 +582,20 @@ define i16 @test_mul_by_30(i16 %x) { ; X86-LABEL: test_mul_by_30: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $30, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shll $5, %eax +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_30: ; X64: # BB#0: -; X64-NEXT: imull $30, %edi, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $5, %eax +; X64-NEXT: subl %edi, %eax +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 30 @@ -587,3 +639,30 @@ %mul = mul nsw i16 %x, 32 ret i16 %mul } + +; (x*9+42)*(x*5+2) +define i16 @test_mul_spec(i16 %x) nounwind { +; X86-LABEL: test_mul_spec: +; X86: # BB#0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal 42(%eax,%eax,8), %ecx +; X86-NEXT: leal 2(%eax,%eax,4), %eax +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: # kill: %AX %AX %EAX +; X86-NEXT: retl +; +; X64-LABEL: test_mul_spec: +; X64: # BB#0: +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal 42(%rdi,%rdi,8), %ecx +; X64-NEXT: leal 2(%rdi,%rdi,4), %eax +; X64-NEXT: imull %ecx, %eax +; X64-NEXT: # kill: %AX %AX %EAX +; X64-NEXT: retq + %mul = mul nsw i16 %x, 9 + %add = add nsw i16 %mul, 42 + %mul2 = mul nsw i16 %x, 5 + %add2 = add nsw i16 %mul2, 2 + %mul3 = mul nsw i16 %add, %add2 + ret i16 %mul3 +} Index: test/CodeGen/X86/mul-constant-i32.ll =================================================================== --- test/CodeGen/X86/mul-constant-i32.ll +++ test/CodeGen/X86/mul-constant-i32.ll @@ -1,6 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG +; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT define i32 @test_mul_by_1(i32 %x) { ; X86-LABEL: test_mul_by_1: @@ -8,10 +14,40 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_1: -; X64: # BB#0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_1: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_1: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_1: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_1: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_1: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_1: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_1: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 1 ret i32 %mul } @@ -23,11 +59,47 @@ ; X86-NEXT: addl %eax, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_2: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (%rdi,%rdi), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_2: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_2: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_2: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: addl %eax, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_2: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_2: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; JAG-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_2: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_2: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; SLM-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 2 ret i32 %mul } @@ -38,11 +110,46 @@ ; X86-NEXT: imull $3, {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_3: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (%rdi,%rdi,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_3: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_3: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_3: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $3, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_3: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_3: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; JAG-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_3: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_3: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; SLM-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 3 ret i32 %mul } @@ -54,11 +161,47 @@ ; X86-NEXT: shll $2, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_4: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (,%rdi,4), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_4: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_4: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_4: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: shll $2, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_4: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_4: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; JAG-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_4: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_4: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; SLM-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 4 ret i32 %mul } @@ -69,11 +212,46 @@ ; X86-NEXT: imull $5, {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_5: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_5: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_5: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_5: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $5, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_5: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_5: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; JAG-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_5: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_5: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; SLM-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 5 ret i32 %mul } @@ -86,12 +264,46 @@ ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_6: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: addl %edi, %edi -; X64-NEXT: leal (%rdi,%rdi,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_6: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_6: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_6: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $6, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_6: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_6: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_6: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_6: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 6 ret i32 %mul } @@ -104,12 +316,46 @@ ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_7: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (,%rdi,8), %eax -; X64-NEXT: subl %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_7: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_7: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_7: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $7, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_7: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_7: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_7: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00] +; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_7: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 7 ret i32 %mul } @@ -121,11 +367,47 @@ ; X86-NEXT: shll $3, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_8: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (,%rdi,8), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_8: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_8: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_8: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: shll $3, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_8: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_8: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; JAG-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_8: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_8: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; SLM-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 8 ret i32 %mul } @@ -136,11 +418,46 @@ ; X86-NEXT: imull $9, {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_9: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (%rdi,%rdi,8), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_9: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_9: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_9: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $9, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_9: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_9: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; JAG-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_9: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_9: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; SLM-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 9 ret i32 %mul } @@ -153,12 +470,46 @@ ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_10: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: addl %edi, %edi -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_10: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_10: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_10: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $10, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_10: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_10: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_10: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_10: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 10 ret i32 %mul } @@ -166,13 +517,49 @@ define i32 @test_mul_by_11(i32 %x) { ; X86-LABEL: test_mul_by_11: ; X86: # BB#0: -; X86-NEXT: imull $11, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_11: -; X64: # BB#0: -; X64-NEXT: imull $11, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_11: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_11: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_11: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $11, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_11: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_11: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_11: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $11, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_11: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 11 ret i32 %mul } @@ -185,12 +572,46 @@ ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_12: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: shll $2, %edi -; X64-NEXT: leal (%rdi,%rdi,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_12: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_12: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_12: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $12, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_12: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_12: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_12: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_12: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 12 ret i32 %mul } @@ -198,13 +619,49 @@ define i32 @test_mul_by_13(i32 %x) { ; X86-LABEL: test_mul_by_13: ; X86: # BB#0: -; X86-NEXT: imull $13, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_13: -; X64: # BB#0: -; X64-NEXT: imull $13, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_13: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_13: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_13: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $13, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_13: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_13: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_13: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $13, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_13: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 13 ret i32 %mul } @@ -212,13 +669,52 @@ define i32 @test_mul_by_14(i32 %x) { ; X86-LABEL: test_mul_by_14: ; X86: # BB#0: -; X86-NEXT: imull $14, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %eax +; X86-NEXT: leal (%ecx,%eax,4), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_14: -; X64: # BB#0: -; X64-NEXT: imull $14, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_14: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_14: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_14: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $14, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_14: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_14: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_14: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $14, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_14: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 14 ret i32 %mul } @@ -231,12 +727,46 @@ ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_15: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: leal (%rax,%rax,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_15: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_15: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_15: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $15, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_15: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_15: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_15: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_15: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 15 ret i32 %mul } @@ -248,11 +778,47 @@ ; X86-NEXT: shll $4, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_16: -; X64: # BB#0: -; X64-NEXT: shll $4, %edi -; X64-NEXT: movl %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_16: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shll $4, %edi # sched: [1:0.50] +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_16: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shll $4, %edi # sched: [1:0.50] +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_16: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: shll $4, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_16: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] +; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_16: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] +; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_16: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shll $4, %edi # sched: [1:1.00] +; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_16: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: shll $4, %edi # sched: [1:1.00] +; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 16 ret i32 %mul } @@ -266,13 +832,49 @@ ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_17: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $4, %eax -; X64-NEXT: leal (%rax,%rdi), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_17: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_17: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_17: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $17, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_17: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_17: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_17: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rax,%rdi), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_17: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 17 ret i32 %mul } @@ -285,12 +887,46 @@ ; X86-NEXT: leal (%eax,%eax,8), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_18: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: addl %edi, %edi -; X64-NEXT: leal (%rdi,%rdi,8), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_18: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_18: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_18: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $18, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_18: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_18: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_18: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_18: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 18 ret i32 %mul } @@ -298,13 +934,52 @@ define i32 @test_mul_by_19(i32 %x) { ; X86-LABEL: test_mul_by_19: ; X86: # BB#0: -; X86-NEXT: imull $19, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: shll $2, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_19: -; X64: # BB#0: -; X64-NEXT: imull $19, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_19: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: shll $2, %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_19: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: shll $2, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_19: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $19, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_19: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_19: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_19: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $19, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_19: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 19 ret i32 %mul } @@ -317,12 +992,46 @@ ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_20: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: shll $2, %edi -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_20: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_20: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_20: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $20, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_20: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_20: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_20: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_20: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 20 ret i32 %mul } @@ -330,13 +1039,49 @@ define i32 @test_mul_by_21(i32 %x) { ; X86-LABEL: test_mul_by_21: ; X86: # BB#0: -; X86-NEXT: imull $21, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_21: -; X64: # BB#0: -; X64-NEXT: imull $21, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_21: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_21: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_21: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $21, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_21: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_21: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_21: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $21, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_21: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 21 ret i32 %mul } @@ -344,13 +1089,52 @@ define i32 @test_mul_by_22(i32 %x) { ; X86-LABEL: test_mul_by_22: ; X86: # BB#0: -; X86-NEXT: imull $22, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: leal (%ecx,%eax,4), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_22: -; X64: # BB#0: -; X64-NEXT: imull $22, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_22: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_22: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_22: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $22, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_22: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_22: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_22: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $22, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_22: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 22 ret i32 %mul } @@ -358,13 +1142,52 @@ define i32 @test_mul_by_23(i32 %x) { ; X86-LABEL: test_mul_by_23: ; X86: # BB#0: -; X86-NEXT: imull $23, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %eax +; X86-NEXT: shll $3, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_23: -; X64: # BB#0: -; X64-NEXT: imull $23, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_23: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: shll $3, %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_23: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: shll $3, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_23: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $23, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_23: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_23: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_23: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $23, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_23: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 23 ret i32 %mul } @@ -377,12 +1200,46 @@ ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_24: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: shll $3, %edi -; X64-NEXT: leal (%rdi,%rdi,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_24: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: shll $3, %edi # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_24: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: shll $3, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_24: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $24, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_24: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_24: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_24: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: shll $3, %edi # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_24: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 24 ret i32 %mul } @@ -395,12 +1252,46 @@ ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_25: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: leal (%rax,%rax,4), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_25: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_25: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_25: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $25, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_25: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_25: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_25: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rax,%rax,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_25: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 25 ret i32 %mul } @@ -408,13 +1299,52 @@ define i32 @test_mul_by_26(i32 %x) { ; X86-LABEL: test_mul_by_26: ; X86: # BB#0: -; X86-NEXT: imull $26, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_26: -; X64: # BB#0: -; X64-NEXT: imull $26, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_26: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_26: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_26: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $26, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_26: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_26: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_26: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $26, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_26: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 26 ret i32 %mul } @@ -427,12 +1357,46 @@ ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_27: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (%rdi,%rdi,8), %eax -; X64-NEXT: leal (%rax,%rax,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_27: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_27: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_27: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $27, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_27: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_27: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_27: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_27: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 27 ret i32 %mul } @@ -440,13 +1404,52 @@ define i32 @test_mul_by_28(i32 %x) { ; X86-LABEL: test_mul_by_28: ; X86: # BB#0: -; X86-NEXT: imull $28, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_28: -; X64: # BB#0: -; X64-NEXT: imull $28, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_28: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_28: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_28: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $28, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_28: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_28: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_28: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $28, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_28: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 28 ret i32 %mul } @@ -454,13 +1457,55 @@ define i32 @test_mul_by_29(i32 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # BB#0: -; X86-NEXT: imull $29, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_29: -; X64: # BB#0: -; X64-NEXT: imull $29, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_29: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_29: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_29: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $29, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_29: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_29: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_29: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $29, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_29: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 29 ret i32 %mul } @@ -468,13 +1513,53 @@ define i32 @test_mul_by_30(i32 %x) { ; X86-LABEL: test_mul_by_30: ; X86: # BB#0: -; X86-NEXT: imull $30, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shll $5, %eax +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_30: -; X64: # BB#0: -; X64-NEXT: imull $30, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_30: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_30: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_30: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $30, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_30: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_30: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_30: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $30, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_30: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 30 ret i32 %mul } @@ -488,12 +1573,46 @@ ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_31: -; X64: # BB#0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $5, %eax -; X64-NEXT: subl %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_31: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_31: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_31: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $31, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_31: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_31: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_31: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00] +; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_31: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 31 ret i32 %mul } @@ -505,11 +1624,124 @@ ; X86-NEXT: shll $5, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_32: -; X64: # BB#0: -; X64-NEXT: shll $5, %edi -; X64-NEXT: movl %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_32: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shll $5, %edi # sched: [1:0.50] +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_32: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shll $5, %edi # sched: [1:0.50] +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_32: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: shll $5, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_32: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] +; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_32: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] +; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_32: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shll $5, %edi # sched: [1:1.00] +; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_32: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: shll $5, %edi # sched: [1:1.00] +; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 32 ret i32 %mul } + +; (x*9+42)*(x*5+2) +define i32 @test_mul_spec(i32 %x) nounwind { +; X86-LABEL: test_mul_spec: +; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal 42(%eax,%eax,8), %ecx +; X86-NEXT: leal 2(%eax,%eax,4), %eax +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: retl +; +; X64-HSW-LABEL: test_mul_spec: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50] +; X64-HSW-NEXT: addl $42, %ecx # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl $2, %eax # sched: [1:0.25] +; X64-HSW-NEXT: imull %ecx, %eax # sched: [4:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_spec: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:0.50] +; X64-JAG-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: imull %ecx, %eax # sched: [3:1.00] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_spec: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: leal 42(%eax,%eax,8), %ecx +; X86-NOOPT-NEXT: leal 2(%eax,%eax,4), %eax +; X86-NOOPT-NEXT: imull %ecx, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_spec: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50] +; HSW-NOOPT-NEXT: addl $42, %ecx # sched: [1:0.25] +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: addl $2, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_spec: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; JAG-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:0.50] +; JAG-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_spec: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00] +; X64-SLM-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: imull %ecx, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_spec: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; SLM-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00] +; SLM-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] + %mul = mul nsw i32 %x, 9 + %add = add nsw i32 %mul, 42 + %mul2 = mul nsw i32 %x, 5 + %add2 = add nsw i32 %mul2, 2 + %mul3 = mul nsw i32 %add, %add2 + ret i32 %mul3 +} Index: test/CodeGen/X86/mul-constant-i64.ll =================================================================== --- test/CodeGen/X86/mul-constant-i64.ll +++ test/CodeGen/X86/mul-constant-i64.ll @@ -1,18 +1,55 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG +; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT -define i64 @test_mul_by_1(i64 %x) { +define i64 @test_mul_by_1(i64 %x) nounwind { ; X86-LABEL: test_mul_by_1: ; X86: # BB#0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_1: -; X64: # BB#0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_1: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_1: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_1: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_1: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_1: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_1: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_1: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 1 ret i64 %mul } @@ -26,10 +63,43 @@ ; X86-NEXT: addl %eax, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_2: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_2: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_2: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_2: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: shldl $1, %eax, %edx +; X86-NOOPT-NEXT: addl %eax, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_2: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_2: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_2: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_2: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 2 ret i64 %mul } @@ -43,10 +113,43 @@ ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_3: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_3: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_3: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_3: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $3, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $3, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_3: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_3: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_3: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_3: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 3 ret i64 %mul } @@ -60,10 +163,43 @@ ; X86-NEXT: shll $2, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_4: -; X64: # BB#0: -; X64-NEXT: leaq (,%rdi,4), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_4: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_4: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_4: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: shldl $2, %eax, %edx +; X86-NOOPT-NEXT: shll $2, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_4: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_4: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_4: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_4: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 4 ret i64 %mul } @@ -77,10 +213,43 @@ ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_5: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,4), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_5: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_5: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_5: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $5, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $5, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_5: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_5: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_5: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_5: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 5 ret i64 %mul } @@ -95,11 +264,46 @@ ; X86-NEXT: leal (%edx,%ecx,2), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_6: -; X64: # BB#0: -; X64-NEXT: addq %rdi, %rdi -; X64-NEXT: leaq (%rdi,%rdi,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_6: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_6: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_6: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $6, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $6, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_6: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_6: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_6: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_6: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 6 ret i64 %mul } @@ -115,11 +319,46 @@ ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_7: -; X64: # BB#0: -; X64-NEXT: leaq (,%rdi,8), %rax -; X64-NEXT: subq %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_7: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_7: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_7: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $7, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $7, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_7: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_7: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_7: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00] +; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_7: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 7 ret i64 %mul } @@ -133,10 +372,43 @@ ; X86-NEXT: shll $3, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_8: -; X64: # BB#0: -; X64-NEXT: leaq (,%rdi,8), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_8: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_8: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_8: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: shldl $3, %eax, %edx +; X86-NOOPT-NEXT: shll $3, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_8: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_8: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_8: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_8: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 8 ret i64 %mul } @@ -150,10 +422,43 @@ ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_9: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,8), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_9: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_9: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_9: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $9, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $9, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_9: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_9: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_9: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_9: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 9 ret i64 %mul } @@ -168,11 +473,46 @@ ; X86-NEXT: leal (%edx,%ecx,2), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_10: -; X64: # BB#0: -; X64-NEXT: addq %rdi, %rdi -; X64-NEXT: leaq (%rdi,%rdi,4), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_10: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_10: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_10: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $10, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $10, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_10: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_10: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_10: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_10: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 10 ret i64 %mul } @@ -180,16 +520,53 @@ define i64 @test_mul_by_11(i64 %x) { ; X86-LABEL: test_mul_by_11: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %ecx ; X86-NEXT: movl $11, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $11, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_11: -; X64: # BB#0: -; X64-NEXT: imulq $11, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_11: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_11: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_11: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $11, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $11, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_11: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_11: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_11: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_11: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 11 ret i64 %mul } @@ -204,11 +581,46 @@ ; X86-NEXT: leal (%edx,%ecx,4), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_12: -; X64: # BB#0: -; X64-NEXT: shlq $2, %rdi -; X64-NEXT: leaq (%rdi,%rdi,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_12: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_12: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_12: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $12, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $12, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_12: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_12: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_12: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_12: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 12 ret i64 %mul } @@ -216,16 +628,53 @@ define i64 @test_mul_by_13(i64 %x) { ; X86-LABEL: test_mul_by_13: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %ecx ; X86-NEXT: movl $13, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $13, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_13: -; X64: # BB#0: -; X64-NEXT: imulq $13, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_13: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_13: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_13: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $13, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $13, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_13: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_13: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_13: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_13: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 13 ret i64 %mul } @@ -233,16 +682,56 @@ define i64 @test_mul_by_14(i64 %x) { ; X86-LABEL: test_mul_by_14: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %ecx +; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl $14, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $14, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_14: -; X64: # BB#0: -; X64-NEXT: imulq $14, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_14: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_14: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_14: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $14, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $14, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_14: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_14: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_14: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_14: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 14 ret i64 %mul } @@ -258,11 +747,46 @@ ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_15: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,4), %rax -; X64-NEXT: leaq (%rax,%rax,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_15: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_15: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_15: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $15, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $15, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_15: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_15: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_15: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_15: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 15 ret i64 %mul } @@ -276,11 +800,49 @@ ; X86-NEXT: shll $4, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_16: -; X64: # BB#0: -; X64-NEXT: shlq $4, %rdi -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_16: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50] +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_16: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shlq $4, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_16: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: shldl $4, %eax, %edx +; X86-NOOPT-NEXT: shll $4, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_16: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] +; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_16: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] +; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_16: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shlq $4, %rdi # sched: [1:1.00] +; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_16: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: shlq $4, %rdi # sched: [1:1.00] +; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 16 ret i64 %mul } @@ -297,12 +859,49 @@ ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_17: -; X64: # BB#0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shlq $4, %rax -; X64-NEXT: leaq (%rax,%rdi), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_17: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_17: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_17: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $17, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $17, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_17: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_17: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_17: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00] +; X64-SLM-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_17: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 17 ret i64 %mul } @@ -317,11 +916,46 @@ ; X86-NEXT: leal (%edx,%ecx,2), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_18: -; X64: # BB#0: -; X64-NEXT: addq %rdi, %rdi -; X64-NEXT: leaq (%rdi,%rdi,8), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_18: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_18: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_18: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $18, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $18, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_18: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_18: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_18: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_18: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 18 ret i64 %mul } @@ -329,16 +963,56 @@ define i64 @test_mul_by_19(i64 %x) { ; X86-LABEL: test_mul_by_19: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: shll $2, %ecx +; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl $19, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $19, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_19: -; X64: # BB#0: -; X64-NEXT: imulq $19, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_19: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: shlq $2, %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_19: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: shlq $2, %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_19: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $19, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $19, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_19: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_19: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_19: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_19: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 19 ret i64 %mul } @@ -353,11 +1027,46 @@ ; X86-NEXT: leal (%edx,%ecx,4), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_20: -; X64: # BB#0: -; X64-NEXT: shlq $2, %rdi -; X64-NEXT: leaq (%rdi,%rdi,4), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_20: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_20: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_20: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $20, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $20, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_20: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_20: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_20: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_20: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 20 ret i64 %mul } @@ -365,16 +1074,53 @@ define i64 @test_mul_by_21(i64 %x) { ; X86-LABEL: test_mul_by_21: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %ecx ; X86-NEXT: movl $21, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $21, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_21: -; X64: # BB#0: -; X64-NEXT: imulq $21, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_21: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_21: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_21: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $21, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $21, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_21: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_21: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_21: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_21: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 21 ret i64 %mul } @@ -382,16 +1128,56 @@ define i64 @test_mul_by_22(i64 %x) { ; X86-LABEL: test_mul_by_22: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %ecx +; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl $22, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $22, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_22: -; X64: # BB#0: -; X64-NEXT: imulq $22, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_22: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_22: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_22: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $22, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $22, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_22: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_22: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_22: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_22: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 22 ret i64 %mul } @@ -399,16 +1185,56 @@ define i64 @test_mul_by_23(i64 %x) { ; X86-LABEL: test_mul_by_23: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: shll $3, %ecx +; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl $23, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $23, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_23: -; X64: # BB#0: -; X64-NEXT: imulq $23, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_23: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: shlq $3, %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_23: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: shlq $3, %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_23: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $23, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $23, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_23: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_23: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_23: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_23: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 23 ret i64 %mul } @@ -423,11 +1249,46 @@ ; X86-NEXT: leal (%edx,%ecx,8), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_24: -; X64: # BB#0: -; X64-NEXT: shlq $3, %rdi -; X64-NEXT: leaq (%rdi,%rdi,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_24: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shlq $3, %rdi # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_24: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shlq $3, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_24: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $24, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $24, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_24: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_24: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_24: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shlq $3, %rdi # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_24: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 24 ret i64 %mul } @@ -443,11 +1304,46 @@ ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_25: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,4), %rax -; X64-NEXT: leaq (%rax,%rax,4), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_25: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_25: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_25: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $25, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $25, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_25: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_25: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_25: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_25: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 25 ret i64 %mul } @@ -455,16 +1351,56 @@ define i64 @test_mul_by_26(i64 %x) { ; X86-LABEL: test_mul_by_26: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl $26, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $26, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_26: -; X64: # BB#0: -; X64-NEXT: imulq $26, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_26: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_26: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_26: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $26, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $26, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_26: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_26: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_26: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_26: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 26 ret i64 %mul } @@ -480,11 +1416,46 @@ ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_27: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,8), %rax -; X64-NEXT: leaq (%rax,%rax,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_27: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_27: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_27: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $27, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $27, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_27: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_27: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_27: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_27: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 27 ret i64 %mul } @@ -492,16 +1463,56 @@ define i64 @test_mul_by_28(i64 %x) { ; X86-LABEL: test_mul_by_28: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl $28, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $28, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_28: -; X64: # BB#0: -; X64-NEXT: imulq $28, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_28: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_28: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_28: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $28, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $28, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_28: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_28: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_28: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_28: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 28 ret i64 %mul } @@ -509,16 +1520,59 @@ define i64 @test_mul_by_29(i64 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl $29, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $29, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_29: -; X64: # BB#0: -; X64-NEXT: imulq $29, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_29: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_29: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_29: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $29, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $29, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_29: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_29: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_29: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_29: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 29 ret i64 %mul } @@ -526,16 +1580,59 @@ define i64 @test_mul_by_30(i64 %x) { ; X86-LABEL: test_mul_by_30: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $5, %ecx +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl $30, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $30, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_30: -; X64: # BB#0: -; X64-NEXT: imulq $30, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_30: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_30: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_30: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $30, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $30, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_30: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_30: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_30: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_30: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 30 ret i64 %mul } @@ -552,12 +1649,49 @@ ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_31: -; X64: # BB#0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shlq $5, %rax -; X64-NEXT: subq %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_31: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_31: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_31: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $31, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $31, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_31: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_31: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_31: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00] +; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_31: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 31 ret i64 %mul } @@ -571,11 +1705,168 @@ ; X86-NEXT: shll $5, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_32: -; X64: # BB#0: -; X64-NEXT: shlq $5, %rdi -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_32: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50] +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_32: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shlq $5, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_32: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: shldl $5, %eax, %edx +; X86-NOOPT-NEXT: shll $5, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_32: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] +; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_32: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] +; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_32: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shlq $5, %rdi # sched: [1:1.00] +; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_32: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: shlq $5, %rdi # sched: [1:1.00] +; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 32 ret i64 %mul } + +; (x*9+42)*(x*5+2) +define i64 @test_mul_spec(i64 %x) nounwind { +; X86-LABEL: test_mul_spec: +; X86: # BB#0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl $9, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: leal (%edi,%edi,8), %ebx +; X86-NEXT: addl $42, %esi +; X86-NEXT: adcl %edx, %ebx +; X86-NEXT: movl $5, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: leal (%edi,%edi,4), %edi +; X86-NEXT: addl $2, %ecx +; X86-NEXT: adcl %edx, %edi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %ecx +; X86-NEXT: imull %esi, %edi +; X86-NEXT: addl %edi, %edx +; X86-NEXT: imull %ebx, %ecx +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-HSW-LABEL: test_mul_spec: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50] +; X64-HSW-NEXT: addq $42, %rcx # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq $2, %rax # sched: [1:0.25] +; X64-HSW-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_spec: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50] +; X64-JAG-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_spec: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: pushl %ebx +; X86-NOOPT-NEXT: pushl %edi +; X86-NOOPT-NEXT: pushl %esi +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOOPT-NEXT: movl $9, %edx +; X86-NOOPT-NEXT: movl %ecx, %eax +; X86-NOOPT-NEXT: mull %edx +; X86-NOOPT-NEXT: movl %eax, %esi +; X86-NOOPT-NEXT: leal (%edi,%edi,8), %ebx +; X86-NOOPT-NEXT: addl $42, %esi +; X86-NOOPT-NEXT: adcl %edx, %ebx +; X86-NOOPT-NEXT: movl $5, %edx +; X86-NOOPT-NEXT: movl %ecx, %eax +; X86-NOOPT-NEXT: mull %edx +; X86-NOOPT-NEXT: movl %eax, %ecx +; X86-NOOPT-NEXT: leal (%edi,%edi,4), %edi +; X86-NOOPT-NEXT: addl $2, %ecx +; X86-NOOPT-NEXT: adcl %edx, %edi +; X86-NOOPT-NEXT: movl %esi, %eax +; X86-NOOPT-NEXT: mull %ecx +; X86-NOOPT-NEXT: imull %esi, %edi +; X86-NOOPT-NEXT: addl %edi, %edx +; X86-NOOPT-NEXT: imull %ebx, %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: popl %esi +; X86-NOOPT-NEXT: popl %edi +; X86-NOOPT-NEXT: popl %ebx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_spec: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50] +; HSW-NOOPT-NEXT: addq $42, %rcx # sched: [1:0.25] +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: addq $2, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_spec: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50] +; JAG-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_spec: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00] +; X64-SLM-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_spec: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00] +; SLM-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] + %mul = mul nsw i64 %x, 9 + %add = add nsw i64 %mul, 42 + %mul2 = mul nsw i64 %x, 5 + %add2 = add nsw i64 %mul2, 2 + %mul3 = mul nsw i64 %add, %add2 + ret i64 %mul3 +} Index: test/CodeGen/X86/mul-constant-result.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/mul-constant-result.ll @@ -0,0 +1,1291 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @mult(i32, i32) local_unnamed_addr #0 { +; X86-LABEL: mult: +; X86: # BB#0: +; X86-NEXT: pushl %esi +; X86-NEXT: .Lcfi0: +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .Lcfi1: +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: cmpl $1, %edx +; X86-NEXT: movl $1, %eax +; X86-NEXT: movl $1, %esi +; X86-NEXT: jg .LBB0_2 +; X86-NEXT: # BB#1: +; X86-NEXT: movl %edx, %esi +; X86-NEXT: .LBB0_2: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: testl %edx, %edx +; X86-NEXT: je .LBB0_4 +; X86-NEXT: # BB#3: +; X86-NEXT: movl %esi, %eax +; X86-NEXT: .LBB0_4: +; X86-NEXT: decl %ecx +; X86-NEXT: cmpl $31, %ecx +; X86-NEXT: ja .LBB0_39 +; X86-NEXT: # BB#5: +; X86-NEXT: jmpl *.LJTI0_0(,%ecx,4) +; X86-NEXT: .LBB0_6: +; X86-NEXT: addl %eax, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_39: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: .LBB0_40: +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_7: +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_8: +; X86-NEXT: shll $2, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_9: +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_10: +; X86-NEXT: addl %eax, %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_11: +; X86-NEXT: leal (,%eax,8), %ecx +; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: .LBB0_13: +; X86-NEXT: shll $3, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_14: +; X86-NEXT: leal (%eax,%eax,8), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_15: +; X86-NEXT: addl %eax, %eax +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_16: +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_17: +; X86-NEXT: shll $2, %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_18: +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_19: +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: jmp .LBB0_20 +; X86-NEXT: .LBB0_21: +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_22: +; X86-NEXT: shll $4, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_23: +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $4, %ecx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_24: +; X86-NEXT: addl %eax, %eax +; X86-NEXT: leal (%eax,%eax,8), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_25: +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: shll $2, %ecx +; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: .LBB0_26: +; X86-NEXT: shll $2, %eax +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_27: +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_28: +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: .LBB0_20: +; X86-NEXT: leal (%eax,%ecx,4), %ecx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_29: +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: shll $3, %ecx +; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: .LBB0_30: +; X86-NEXT: shll $3, %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_31: +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_32: +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: .LBB0_33: +; X86-NEXT: leal (%eax,%eax,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_34: +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_35: +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_36: +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $5, %ecx +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: .LBB0_37: +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $5, %ecx +; X86-NEXT: .LBB0_12: +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_38: +; X86-NEXT: shll $5, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-HSW-LABEL: mult: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: cmpl $1, %esi +; X64-HSW-NEXT: movl $1, %ecx +; X64-HSW-NEXT: movl %esi, %eax +; X64-HSW-NEXT: cmovgl %ecx, %eax +; X64-HSW-NEXT: testl %esi, %esi +; X64-HSW-NEXT: cmovel %ecx, %eax +; X64-HSW-NEXT: addl $-1, %edi +; X64-HSW-NEXT: cmpl $31, %edi +; X64-HSW-NEXT: ja .LBB0_36 +; X64-HSW-NEXT: # BB#1: +; X64-HSW-NEXT: jmpq *.LJTI0_0(,%rdi,8) +; X64-HSW-NEXT: .LBB0_2: +; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_36: +; X64-HSW-NEXT: xorl %eax, %eax +; X64-HSW-NEXT: .LBB0_37: +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_3: +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_4: +; X64-HSW-NEXT: shll $2, %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_5: +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_6: +; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_7: +; X64-HSW-NEXT: leal (,%rax,8), %ecx +; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: .LBB0_9: +; X64-HSW-NEXT: shll $3, %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_10: +; X64-HSW-NEXT: leal (%rax,%rax,8), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_11: +; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_12: +; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx +; X64-HSW-NEXT: leal (%rax,%rcx,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_13: +; X64-HSW-NEXT: shll $2, %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_14: +; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx +; X64-HSW-NEXT: leal (%rax,%rcx,4), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_15: +; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx +; X64-HSW-NEXT: jmp .LBB0_16 +; X64-HSW-NEXT: .LBB0_18: +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_19: +; X64-HSW-NEXT: shll $4, %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_20: +; X64-HSW-NEXT: movl %eax, %ecx +; X64-HSW-NEXT: shll $4, %ecx +; X64-HSW-NEXT: jmp .LBB0_17 +; X64-HSW-NEXT: .LBB0_21: +; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: leal (%rax,%rax,8), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_22: +; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx +; X64-HSW-NEXT: shll $2, %ecx +; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: .LBB0_23: +; X64-HSW-NEXT: shll $2, %eax +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_24: +; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx +; X64-HSW-NEXT: leal (%rax,%rcx,4), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_25: +; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx +; X64-HSW-NEXT: .LBB0_16: +; X64-HSW-NEXT: leal (%rax,%rcx,4), %ecx +; X64-HSW-NEXT: jmp .LBB0_17 +; X64-HSW-NEXT: .LBB0_26: +; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx +; X64-HSW-NEXT: shll $3, %ecx +; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: .LBB0_27: +; X64-HSW-NEXT: shll $3, %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_28: +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_29: +; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx +; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx +; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: .LBB0_30: +; X64-HSW-NEXT: leal (%rax,%rax,8), %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_31: +; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx +; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx +; X64-HSW-NEXT: jmp .LBB0_17 +; X64-HSW-NEXT: .LBB0_32: +; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx +; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx +; X64-HSW-NEXT: addl %eax, %ecx +; X64-HSW-NEXT: .LBB0_17: +; X64-HSW-NEXT: addl %eax, %ecx +; X64-HSW-NEXT: movl %ecx, %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_33: +; X64-HSW-NEXT: movl %eax, %ecx +; X64-HSW-NEXT: shll $5, %ecx +; X64-HSW-NEXT: subl %eax, %ecx +; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: .LBB0_34: +; X64-HSW-NEXT: movl %eax, %ecx +; X64-HSW-NEXT: shll $5, %ecx +; X64-HSW-NEXT: .LBB0_8: +; X64-HSW-NEXT: subl %eax, %ecx +; X64-HSW-NEXT: movl %ecx, %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_35: +; X64-HSW-NEXT: shll $5, %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq + %3 = icmp eq i32 %1, 0 + %4 = icmp sgt i32 %1, 1 + %5 = or i1 %3, %4 + %6 = select i1 %5, i32 1, i32 %1 + switch i32 %0, label %69 [ + i32 1, label %70 + i32 2, label %7 + i32 3, label %9 + i32 4, label %11 + i32 5, label %13 + i32 6, label %15 + i32 7, label %17 + i32 8, label %19 + i32 9, label %21 + i32 10, label %23 + i32 11, label %25 + i32 12, label %27 + i32 13, label %29 + i32 14, label %31 + i32 15, label %33 + i32 16, label %35 + i32 17, label %37 + i32 18, label %39 + i32 19, label %41 + i32 20, label %43 + i32 21, label %45 + i32 22, label %47 + i32 23, label %49 + i32 24, label %51 + i32 25, label %53 + i32 26, label %55 + i32 27, label %57 + i32 28, label %59 + i32 29, label %61 + i32 30, label %63 + i32 31, label %65 + i32 32, label %67 + ] + +;