diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1193,7 +1193,8 @@ /// case is to provision the cost of vectorization/scalarization in /// vectorizer passes. InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index = -1) const; + unsigned Index = -1, Value *Op0 = nullptr, + Value *Op1 = nullptr) const; /// \return The expected cost of vector Insert and Extract. /// This is used when instruction is available, and implementation @@ -1786,7 +1787,8 @@ TTI::TargetCostKind CostKind, const Instruction *I) = 0; virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) = 0; + unsigned Index, Value *Op0, + Value *Op1) = 0; virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, unsigned Index) = 0; @@ -2358,9 +2360,9 @@ const Instruction *I) override { return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } - InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) override { - return Impl.getVectorInstrCost(Opcode, Val, Index); + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index, + Value *Op0, Value *Op1) override { + return Impl.getVectorInstrCost(Opcode, Val, Index, Op0, Op1); } InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, unsigned Index) override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -585,8 +585,8 @@ return 1; } - InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const { + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index, + Value *Op0, Value *Op1) const { return 1; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -90,10 +90,12 @@ InstructionCost Cost = 0; // Broadcast cost is equal to the cost of extracting the zero'th element // plus the cost of inserting it into every element of the result vector. - Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0); + Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0, + nullptr, nullptr); for (int i = 0, e = VTy->getNumElements(); i < e; ++i) { - Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i); + Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i, + nullptr, nullptr); } return Cost; } @@ -110,8 +112,10 @@ // vector and finally index 3 of second vector and insert them at index // <0,1,2,3> of result vector. for (int i = 0, e = VTy->getNumElements(); i < e; ++i) { - Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i); - Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i); + Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i, + nullptr, nullptr); + Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i, + nullptr, nullptr); } return Cost; } @@ -134,9 +138,9 @@ // type. for (int i = 0; i != NumSubElts; ++i) { Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, - i + Index); - Cost += - thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i); + i + Index, nullptr, nullptr); + Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i, + nullptr, nullptr); } return Cost; } @@ -158,10 +162,10 @@ // the source type plus the cost of inserting them into the result vector // type. for (int i = 0; i != NumSubElts; ++i) { - Cost += - thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy, i); + Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy, + i, nullptr, nullptr); Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, - i + Index); + i + Index, nullptr, nullptr); } return Cost; } @@ -212,7 +216,7 @@ FixedVectorType::get( PointerType::get(VT->getElementType(), 0), VT->getNumElements()), - -1) + -1, nullptr, nullptr) : 0; InstructionCost LoadCost = VT->getNumElements() * @@ -237,7 +241,7 @@ Instruction::ExtractElement, FixedVectorType::get(Type::getInt1Ty(DataTy->getContext()), VT->getNumElements()), - -1) + + -1, nullptr, nullptr) + getCFInstrCost(Instruction::Br, CostKind) + getCFInstrCost(Instruction::PHI, CostKind)); } @@ -722,9 +726,11 @@ if (!DemandedElts[i]) continue; if (Insert) - Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i); + Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i, + nullptr, nullptr); if (Extract) - Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i); + Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i, + nullptr, nullptr); } return Cost; @@ -1123,7 +1129,7 @@ InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) { return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy, - Index) + + Index, nullptr, nullptr) + thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(), TTI::CastContextHint::None, TTI::TCK_RecipThroughput); @@ -1184,14 +1190,20 @@ return 1; } - InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) { + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index, + Value *Op0, Value *Op1) { return getRegUsageForType(Val->getScalarType()); } InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, unsigned Index) { - return thisT()->getVectorInstrCost(I.getOpcode(), Val, Index); + Value *Op0 = nullptr; + Value *Op1 = nullptr; + if (auto *IE = dyn_cast(&I)) { + Op0 = IE->getOperand(0); + Op1 = IE->getOperand(1); + } + return thisT()->getVectorInstrCost(I.getOpcode(), Val, Index, Op0, Op1); } InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, @@ -2246,7 +2258,8 @@ ArithCost += NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind); return ShuffleCost + ArithCost + - thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); + thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0, + nullptr, nullptr); } /// Try to calculate the cost of performing strict (in-order) reductions, @@ -2353,7 +2366,8 @@ // The last min/max should be in vector registers and we counted it above. // So just need a single extractelement. return ShuffleCost + MinMaxCost + - thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); + thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0, + nullptr, nullptr); } InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -897,13 +897,13 @@ return Cost; } -InstructionCost TargetTransformInfo::getVectorInstrCost(unsigned Opcode, - Type *Val, - unsigned Index) const { +InstructionCost TargetTransformInfo::getVectorInstrCost( + unsigned Opcode, Type *Val, unsigned Index, Value *Op0, Value *Op1) const { // FIXME: Assert that Opcode is either InsertElement or ExtractElement. // This is mentioned in the interface description and respected by all // callers, but never asserted upon. - InstructionCost Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index); + InstructionCost Cost = + TTIImpl->getVectorInstrCost(Opcode, Val, Index, Op0, Op1); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -169,8 +169,8 @@ InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index); + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index, + Value *Op0, Value *Op1); InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, unsigned Index); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2034,8 +2034,8 @@ // Get the cost for the extract. We compute the cost (if any) for the extend // below. - InstructionCost Cost = - getVectorInstrCost(Instruction::ExtractElement, VecTy, Index); + InstructionCost Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, + Index, nullptr, nullptr); // Legalize the types. auto VecLT = getTypeLegalizationCost(VecTy); @@ -2128,7 +2128,8 @@ } InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) { + unsigned Index, Value *Op0, + Value *Op1) { return getVectorInstrCostHelper(Val, Index, false /* HasRealUse */); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -162,7 +162,7 @@ using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, - unsigned Index); + unsigned Index, Value *Op0, Value *Op1); bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const; bool isSourceOfDivergence(const Value *V) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -790,7 +790,8 @@ } InstructionCost GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, - unsigned Index) { + unsigned Index, Value *Op0, + Value *Op1) { switch (Opcode) { case Instruction::ExtractElement: case Instruction::InsertElement: { @@ -799,7 +800,7 @@ if (EltSize < 32) { if (EltSize == 16 && Index == 0 && ST->has16BitInsts()) return 0; - return BaseT::getVectorInstrCost(Opcode, ValTy, Index); + return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1); } // Extracts are just reads of a subregister, so are free. Inserts are @@ -810,7 +811,7 @@ return Index == ~0u ? 2 : 0; } default: - return BaseT::getVectorInstrCost(Opcode, ValTy, Index); + return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1); } } diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h @@ -62,7 +62,7 @@ const Instruction *I = nullptr); using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, - unsigned Index); + unsigned Index, Value *Op0, Value *Op1); }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp @@ -108,14 +108,15 @@ } InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, - unsigned Index) { + unsigned Index, Value *Op0, + Value *Op1) { switch (Opcode) { case Instruction::ExtractElement: case Instruction::InsertElement: { unsigned EltSize = DL.getTypeSizeInBits(cast(ValTy)->getElementType()); if (EltSize < 32) { - return BaseT::getVectorInstrCost(Opcode, ValTy, Index); + return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1); } // Extracts are just reads of a subregister, so are free. Inserts are @@ -126,7 +127,7 @@ return Index == ~0u ? 2 : 0; } default: - return BaseT::getVectorInstrCost(Opcode, ValTy, Index); + return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1); } } diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -240,8 +240,8 @@ const Instruction *I = nullptr); using BaseT::getVectorInstrCost; - InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index); + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index, + Value *Op0, Value *Op1); InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -874,7 +874,8 @@ } InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, - unsigned Index) { + unsigned Index, Value *Op0, + Value *Op1) { // Penalize inserting into an D-subregister. We end up with a three times // lower estimated throughput on swift. if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement && @@ -893,7 +894,7 @@ if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32) return std::max( - BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U); + BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1), 2U); } if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement || @@ -906,7 +907,7 @@ return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1); } - return BaseT::getVectorInstrCost(Opcode, ValTy, Index); + return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1); } InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -154,8 +154,8 @@ TTI::TargetCostKind CostKind, const Instruction *I = nullptr); using BaseT::getVectorInstrCost; - InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index); + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index, + Value *Op0, Value *Op1); InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) { diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -329,7 +329,8 @@ } InstructionCost HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) { + unsigned Index, Value *Op0, + Value *Op1) { Type *ElemTy = Val->isVectorTy() ? cast(Val)->getElementType() : Val; if (Opcode == Instruction::InsertElement) { @@ -338,7 +339,8 @@ if (ElemTy->isIntegerTy(32)) return Cost; // If it's not a 32-bit value, there will need to be an extract. - return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index); + return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index, + Op0, Op1); } if (Opcode == Instruction::ExtractElement) diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -126,8 +126,8 @@ TTI::TargetCostKind CostKind, const Instruction *I = nullptr); using BaseT::getVectorInstrCost; - InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index); + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index, + Value *Op0, Value *Op1); InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -717,7 +717,8 @@ } InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) { + unsigned Index, Value *Op0, + Value *Op1) { assert(Val->isVectorTy() && "This must be a vector type"); int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -727,7 +728,8 @@ if (!CostFactor.isValid()) return InstructionCost::getMax(); - InstructionCost Cost = BaseT::getVectorInstrCost(Opcode, Val, Index); + InstructionCost Cost = + BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1); Cost *= CostFactor; if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) { @@ -869,7 +871,8 @@ if (Src->isVectorTy() && Opcode == Instruction::Store) for (int i = 0, e = cast(Src)->getNumElements(); i < e; ++i) - Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i); + Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i, nullptr, + nullptr); return Cost; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -157,8 +157,8 @@ const Instruction *I = nullptr); using BaseT::getVectorInstrCost; - InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index); + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index, + Value *Op0, Value *Op1); InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1131,12 +1131,13 @@ } InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) { + unsigned Index, Value *Op0, + Value *Op1) { assert(Val->isVectorTy() && "This must be a vector type"); if (Opcode != Instruction::ExtractElement && Opcode != Instruction::InsertElement) - return BaseT::getVectorInstrCost(Opcode, Val, Index); + return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1); // Legalize the type. std::pair LT = getTypeLegalizationCost(Val); @@ -1150,7 +1151,7 @@ return LT.first; if (!isTypeLegal(Val)) - return BaseT::getVectorInstrCost(Opcode, Val, Index); + return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1); // In RVV, we could use vslidedown + vmv.x.s to extract element from vector // and vslideup + vmv.s.x to insert element to vector. diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -107,8 +107,8 @@ TTI::TargetCostKind CostKind, const Instruction *I = nullptr); using BaseT::getVectorInstrCost; - InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index); + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index, + Value *Op0, Value *Op1); bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue); InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -996,7 +996,8 @@ } InstructionCost SystemZTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) { + unsigned Index, Value *Op0, + Value *Op1) { // vlvgp will insert two grs into a vector register, so only count half the // number of instructions. if (Opcode == Instruction::InsertElement && Val->isIntOrIntVectorTy(64)) @@ -1012,7 +1013,7 @@ return Cost; } - return BaseT::getVectorInstrCost(Opcode, Val, Index); + return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1); } // Check if a load may be folded as a memory operand in its user. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -66,8 +66,8 @@ ArrayRef Args = ArrayRef(), const Instruction *CxtI = nullptr); using BaseT::getVectorInstrCost; - InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index); + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index, + Value *Op0, Value *Op1); /// @} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -82,9 +82,10 @@ InstructionCost WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) { + unsigned Index, + Value *Op0, Value *Op1) { InstructionCost Cost = - BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index); + BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index, Op0, Op1); // SIMD128's insert/extract currently only take constant indices. if (Index == -1u) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -147,8 +147,8 @@ TTI::TargetCostKind CostKind, const Instruction *I = nullptr); using BaseT::getVectorInstrCost; - InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index); + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index, + Value *Op0, Value *Op1); InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -4257,7 +4257,8 @@ } InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) { + unsigned Index, Value *Op0, + Value *Op1) { static const CostTblEntry SLMCostTbl[] = { { ISD::EXTRACT_VECTOR_ELT, MVT::i8, 4 }, { ISD::EXTRACT_VECTOR_ELT, MVT::i16, 4 }, @@ -4337,6 +4338,18 @@ if (ScalarType->isFloatingPointTy()) return RegisterFileMoveCost; + if (Opcode == Instruction::InsertElement && + isa_and_nonnull(Op0)) { + // Consider the gather cost to be cheap. + if (isa_and_nonnull(Op1)) + return RegisterFileMoveCost; + // mov constang-to-GPR + movd/movq GPR -> XMM. + if (Op1->getType()->isIntegerTy() && isa_and_nonnull(Op1)) + return 2 + RegisterFileMoveCost; + // Assume movd/movq GPR -> XMM is relatively cheap on all targets. + return 1 + RegisterFileMoveCost; + } + // Assume movd/movq XMM -> GPR is relatively cheap on all targets. if (ScalarType->isIntegerTy() && Opcode == Instruction::ExtractElement) return 1 + RegisterFileMoveCost; @@ -4383,7 +4396,8 @@ if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy()) RegisterFileMoveCost += 1; - return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; + return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1) + + RegisterFileMoveCost; } InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty, @@ -5155,7 +5169,8 @@ } // Add the final extract element to the cost. - return ReductionCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0); + return ReductionCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0, + nullptr, nullptr); } InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, @@ -5455,7 +5470,8 @@ } // Add the final extract element to the cost. - return MinMaxCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0); + return MinMaxCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0, + nullptr, nullptr); } /// Calculate the cost of materializing a 64-bit value. This helper diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6724,9 +6724,24 @@ // broadcast. assert(VecTy == FinalVecTy && "No reused scalars expected for broadcast."); - return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, - /*Mask=*/std::nullopt, CostKind, /*Index=*/0, - /*SubTp=*/nullptr, /*Args=*/VL[0]); + const auto *It = + find_if(VL, [](Value *V) { return !isa(V); }); + // If all values are undefs - consider cost free. + if (It == VL.end()) + return TTI::TCC_Free; + // Add broadcast for non-identity shuffle only. + bool NeedShuffle = + VL.front() != *It || !all_of(VL.drop_front(), UndefValue::classof); + InstructionCost InsertCost = + TTI->getVectorInstrCost(Instruction::InsertElement, VecTy, + /*Index=*/0, PoisonValue::get(VecTy), *It); + return InsertCost + (NeedShuffle + ? TTI->getShuffleCost( + TargetTransformInfo::SK_Broadcast, VecTy, + /*Mask=*/std::nullopt, CostKind, + /*Index=*/0, + /*SubTp=*/nullptr, /*Args=*/VL[0]) + : TTI::TCC_Free); } InstructionCost ReuseShuffleCost = 0; if (NeedToShuffleReuses) diff --git a/llvm/test/Analysis/CostModel/X86/loop_v2-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/loop_v2-inseltpoison.ll --- a/llvm/test/Analysis/CostModel/X86/loop_v2-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/loop_v2-inseltpoison.ll @@ -20,7 +20,7 @@ %5 = extractelement <2 x i64> %2, i32 1 %6 = getelementptr inbounds i32, ptr %A, i64 %5 %7 = load i32, ptr %4, align 4 - ;CHECK: cost of 1 {{.*}} insert + ;CHECK: cost of 0 {{.*}} insert %8 = insertelement <2 x i32> poison, i32 %7, i32 0 %9 = load i32, ptr %6, align 4 ;CHECK: cost of 1 {{.*}} insert diff --git a/llvm/test/Analysis/CostModel/X86/loop_v2.ll b/llvm/test/Analysis/CostModel/X86/loop_v2.ll --- a/llvm/test/Analysis/CostModel/X86/loop_v2.ll +++ b/llvm/test/Analysis/CostModel/X86/loop_v2.ll @@ -20,7 +20,7 @@ %5 = extractelement <2 x i64> %2, i32 1 %6 = getelementptr inbounds i32, ptr %A, i64 %5 %7 = load i32, ptr %4, align 4 - ;CHECK: cost of 1 {{.*}} insert + ;CHECK: cost of 0 {{.*}} insert %8 = insertelement <2 x i32> undef, i32 %7, i32 0 %9 = load i32, ptr %6, align 4 ;CHECK: cost of 1 {{.*}} insert diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll @@ -1907,7 +1907,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE2-LABEL: 'test_gather_16f32_const_mask2' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind @@ -1966,7 +1966,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { ; SSE2-LABEL: 'test_scatter_16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -1907,7 +1907,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE2-LABEL: 'test_gather_16f32_const_mask2' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind @@ -1966,7 +1966,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { ; SSE2-LABEL: 'test_scatter_16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> diff --git a/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll --- a/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll @@ -294,71 +294,71 @@ ; ; SSE4-LABEL: 'insert_i64' ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 ; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 ; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'insert_i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'insert_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'insert_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'insert_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 ; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -382,153 +382,153 @@ define i32 @insert_i32(i32 %arg) { ; SSE2-LABEL: 'insert_i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'insert_i32' ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 ; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 ; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'insert_i32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'insert_i32' ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 ; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 ; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'insert_i32' ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'insert_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'insert_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'insert_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 ; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -558,76 +558,76 @@ define i32 @insert_i16(i32 %arg) { ; SSE-LABEL: 'insert_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 ; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'insert_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'insert_i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -664,265 +664,265 @@ define i32 @insert_i8(i32 %arg) { ; SSE2-LABEL: 'insert_i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'insert_i8' ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 ; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'insert_i8' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'insert_i8' ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 ; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 ; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'insert_i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'insert_i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'insert_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'insert_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 ; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -971,31 +971,31 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1004,31 +1004,31 @@ ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1037,156 +1037,156 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'insert_i1' ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'insert_i1' ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'insert_i1' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'insert_i1' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 @@ -1199,67 +1199,67 @@ ; ; SLM-LABEL: 'insert_i1' ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'insert_i1' ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; diff --git a/llvm/test/Analysis/CostModel/X86/vector-insert.ll b/llvm/test/Analysis/CostModel/X86/vector-insert.ll --- a/llvm/test/Analysis/CostModel/X86/vector-insert.ll +++ b/llvm/test/Analysis/CostModel/X86/vector-insert.ll @@ -294,71 +294,71 @@ ; ; SSE4-LABEL: 'insert_i64' ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1 ; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3 ; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3 -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7 ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'insert_i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'insert_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'insert_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1 ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3 ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'insert_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1 ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3 ; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3 -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -382,153 +382,153 @@ define i32 @insert_i32(i32 %arg) { ; SSE2-LABEL: 'insert_i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'insert_i32' ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3 ; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3 -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7 ; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3 -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'insert_i32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'insert_i32' ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3 ; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3 -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7 ; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3 -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15 ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'insert_i32' ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'insert_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'insert_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3 ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'insert_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3 ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3 -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7 ; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3 -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -558,76 +558,76 @@ define i32 @insert_i16(i32 %arg) { ; SSE-LABEL: 'insert_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 undef, i32 %arg -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 undef, i32 1 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 undef, i32 %arg -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> undef, i16 undef, i32 3 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7 ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15 ; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = insertelement <32 x i16> undef, i16 undef, i32 15 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> undef, i16 undef, i32 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = insertelement <32 x i16> undef, i16 undef, i32 24 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_16 = insertelement <32 x i16> undef, i16 undef, i32 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_24 = insertelement <32 x i16> undef, i16 undef, i32 24 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = insertelement <32 x i16> undef, i16 undef, i32 31 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'insert_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 undef, i32 1 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> undef, i16 undef, i32 3 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_15 = insertelement <32 x i16> undef, i16 undef, i32 15 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> undef, i16 undef, i32 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_24 = insertelement <32 x i16> undef, i16 undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_16 = insertelement <32 x i16> undef, i16 undef, i32 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_24 = insertelement <32 x i16> undef, i16 undef, i32 24 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_31 = insertelement <32 x i16> undef, i16 undef, i32 31 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'insert_i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 undef, i32 1 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> undef, i16 undef, i32 3 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_15 = insertelement <32 x i16> undef, i16 undef, i32 15 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_16 = insertelement <32 x i16> undef, i16 undef, i32 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_24 = insertelement <32 x i16> undef, i16 undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_16 = insertelement <32 x i16> undef, i16 undef, i32 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_24 = insertelement <32 x i16> undef, i16 undef, i32 24 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_31 = insertelement <32 x i16> undef, i16 undef, i32 31 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -664,265 +664,265 @@ define i32 @insert_i8(i32 %arg) { ; SSE2-LABEL: 'insert_i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31 ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE3-LABEL: 'insert_i8' ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31 ; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'insert_i8' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'insert_i8' ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15 ; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31 ; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31 -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63 ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'insert_i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'insert_i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'insert_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15 ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31 ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'insert_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15 ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31 ; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31 -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -971,31 +971,31 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1004,31 +1004,31 @@ ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 -; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 ; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1037,156 +1037,156 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE4-LABEL: 'insert_i1' ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31 ; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31 -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 -; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 +; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 ; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63 ; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'insert_i1' ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'insert_i1' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'insert_i1' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15 @@ -1199,67 +1199,67 @@ ; ; SLM-LABEL: 'insert_i1' ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'insert_i1' ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_a = insertelement <2 x i1> undef, i1 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31 -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll @@ -400,7 +400,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift @@ -437,7 +437,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift @@ -486,7 +486,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift @@ -534,35 +534,11 @@ } define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; AVX-LABEL: 'splatvar_shift_v4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; XOP-LABEL: 'splatvar_shift_v4i32' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; AVX512-LABEL: 'splatvar_shift_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; CHECK-LABEL: 'splatvar_shift_v4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %insert = insertelement <4 x i32> undef, i32 %b, i32 0 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer @@ -572,7 +548,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift @@ -621,7 +597,7 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift @@ -811,7 +787,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift @@ -860,7 +836,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift @@ -915,7 +891,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll @@ -443,17 +443,11 @@ ; define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v2i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 @@ -492,17 +486,11 @@ } define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 @@ -541,17 +529,11 @@ } define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 @@ -590,17 +572,11 @@ } define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v4i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; AVX-LABEL: 'splatvar_shift_v4i32' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 @@ -627,17 +603,11 @@ } define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v8i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 @@ -676,17 +646,11 @@ } define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v16i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 @@ -879,7 +843,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift @@ -928,7 +892,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift @@ -995,7 +959,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -443,17 +443,11 @@ ; define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v2i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 @@ -492,17 +486,11 @@ } define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -541,17 +529,11 @@ } define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -590,17 +572,11 @@ } define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v4i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; AVX-LABEL: 'splatvar_shift_v4i32' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 @@ -627,17 +603,11 @@ } define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v8i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -676,17 +646,11 @@ } define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v16i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -879,7 +843,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift @@ -928,7 +892,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift @@ -995,7 +959,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll @@ -408,7 +408,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift @@ -445,7 +445,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift @@ -494,7 +494,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift @@ -543,7 +543,7 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift @@ -592,7 +592,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift @@ -641,7 +641,7 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift @@ -843,7 +843,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift @@ -898,7 +898,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = ashr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift @@ -953,7 +953,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = ashr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll @@ -408,7 +408,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift @@ -445,7 +445,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift @@ -494,7 +494,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = ashr <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift @@ -542,35 +542,11 @@ } define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; AVX-LABEL: 'splatvar_shift_v4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; XOP-LABEL: 'splatvar_shift_v4i32' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; AVX512-LABEL: 'splatvar_shift_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; CHECK-LABEL: 'splatvar_shift_v4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %insert = insertelement <4 x i32> undef, i32 %b, i32 0 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer @@ -580,7 +556,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift @@ -629,7 +605,7 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift @@ -819,7 +795,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift @@ -868,7 +844,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift @@ -923,7 +899,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll @@ -407,35 +407,11 @@ ; define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; AVX-LABEL: 'splatvar_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; XOP-LABEL: 'splatvar_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; AVX512-LABEL: 'splatvar_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; CHECK-LABEL: 'splatvar_shift_v2i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %insert = insertelement <2 x i64> undef, i64 %b, i32 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer @@ -445,7 +421,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift @@ -494,7 +470,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift @@ -542,35 +518,11 @@ } define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; AVX-LABEL: 'splatvar_shift_v4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; XOP-LABEL: 'splatvar_shift_v4i32' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; AVX512-LABEL: 'splatvar_shift_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; CHECK-LABEL: 'splatvar_shift_v4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %insert = insertelement <4 x i32> undef, i32 %b, i32 0 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer @@ -580,7 +532,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift @@ -629,7 +581,7 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift @@ -819,7 +771,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift @@ -862,7 +814,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift @@ -911,7 +863,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll @@ -447,17 +447,11 @@ ; define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatvar_shift_v2i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 @@ -484,17 +478,11 @@ } define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 @@ -533,17 +521,11 @@ } define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 @@ -582,17 +564,11 @@ } define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v4i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; AVX-LABEL: 'splatvar_shift_v4i32' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 @@ -619,17 +595,11 @@ } define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v8i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 @@ -668,17 +638,11 @@ } define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v16i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 @@ -871,7 +835,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift @@ -920,7 +884,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift @@ -969,7 +933,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -447,17 +447,11 @@ ; define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatvar_shift_v2i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 @@ -484,17 +478,11 @@ } define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -533,17 +521,11 @@ } define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -582,17 +564,11 @@ } define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v4i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; AVX-LABEL: 'splatvar_shift_v4i32' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 @@ -619,17 +595,11 @@ } define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v8i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -668,17 +638,11 @@ } define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v16i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -871,7 +835,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift @@ -920,7 +884,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift @@ -969,7 +933,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll @@ -431,17 +431,11 @@ ; define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v2i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 @@ -480,17 +474,11 @@ } define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -529,17 +517,11 @@ } define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -578,17 +560,11 @@ } define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v4i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 @@ -627,17 +603,11 @@ } define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v8i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -676,17 +646,11 @@ } define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v16i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -879,7 +843,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift @@ -934,7 +898,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift @@ -983,7 +947,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll @@ -411,35 +411,11 @@ ; define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; AVX-LABEL: 'splatvar_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; XOP-LABEL: 'splatvar_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; AVX512-LABEL: 'splatvar_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; CHECK-LABEL: 'splatvar_shift_v2i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %insert = insertelement <2 x i64> undef, i64 %b, i32 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer @@ -449,7 +425,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift @@ -498,7 +474,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift @@ -546,35 +522,11 @@ } define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; AVX-LABEL: 'splatvar_shift_v4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; XOP-LABEL: 'splatvar_shift_v4i32' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; AVX512-LABEL: 'splatvar_shift_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; CHECK-LABEL: 'splatvar_shift_v4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %insert = insertelement <4 x i32> undef, i32 %b, i32 0 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer @@ -584,7 +536,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift @@ -633,7 +585,7 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift @@ -823,7 +775,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift @@ -866,7 +818,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift @@ -915,7 +867,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll @@ -407,35 +407,11 @@ ; define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; AVX-LABEL: 'splatvar_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; XOP-LABEL: 'splatvar_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; AVX512-LABEL: 'splatvar_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; CHECK-LABEL: 'splatvar_shift_v2i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %insert = insertelement <2 x i64> undef, i64 %b, i32 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer @@ -445,7 +421,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift @@ -494,7 +470,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift @@ -542,35 +518,11 @@ } define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; AVX-LABEL: 'splatvar_shift_v4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; XOP-LABEL: 'splatvar_shift_v4i32' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; AVX512-LABEL: 'splatvar_shift_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; CHECK-LABEL: 'splatvar_shift_v4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %insert = insertelement <4 x i32> undef, i32 %b, i32 0 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer @@ -580,7 +532,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift @@ -629,7 +581,7 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift @@ -819,7 +771,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift @@ -868,7 +820,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift @@ -917,7 +869,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll @@ -471,17 +471,11 @@ ; define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatvar_shift_v2i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 @@ -500,12 +494,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift -; -; SLM-LABEL: 'splatvar_shift_v2i64' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %insert = insertelement <2 x i64> poison, i64 %b, i32 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer @@ -514,17 +502,11 @@ } define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 @@ -555,12 +537,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift -; -; SLM-LABEL: 'splatvar_shift_v4i64' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> poison, i64 %b, i32 0 %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer @@ -569,17 +545,11 @@ } define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 @@ -610,12 +580,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift -; -; SLM-LABEL: 'splatvar_shift_v8i64' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> poison, i64 %b, i32 0 %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer @@ -624,17 +588,11 @@ } define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v4i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; AVX-LABEL: 'splatvar_shift_v4i32' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 @@ -653,12 +611,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift -; -; SLM-LABEL: 'splatvar_shift_v4i32' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %insert = insertelement <4 x i32> poison, i32 %b, i32 0 %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer @@ -667,17 +619,11 @@ } define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v8i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 @@ -708,12 +654,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift -; -; SLM-LABEL: 'splatvar_shift_v8i32' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> poison, i32 %b, i32 0 %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer @@ -722,17 +662,11 @@ } define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v16i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 @@ -763,12 +697,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift -; -; SLM-LABEL: 'splatvar_shift_v16i32' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> poison, i32 %b, i32 0 %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer @@ -949,7 +877,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift @@ -1004,7 +932,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift @@ -1059,7 +987,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -471,17 +471,11 @@ ; define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatvar_shift_v2i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 @@ -500,12 +494,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift -; -; SLM-LABEL: 'splatvar_shift_v2i64' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %insert = insertelement <2 x i64> undef, i64 %b, i32 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer @@ -514,17 +502,11 @@ } define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -555,12 +537,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift -; -; SLM-LABEL: 'splatvar_shift_v4i64' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> undef, i64 %b, i32 0 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer @@ -569,17 +545,11 @@ } define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -610,12 +580,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift -; -; SLM-LABEL: 'splatvar_shift_v8i64' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> undef, i64 %b, i32 0 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer @@ -624,17 +588,11 @@ } define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v4i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; AVX-LABEL: 'splatvar_shift_v4i32' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 @@ -653,12 +611,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift -; -; SLM-LABEL: 'splatvar_shift_v4i32' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %insert = insertelement <4 x i32> undef, i32 %b, i32 0 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer @@ -667,17 +619,11 @@ } define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v8i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -708,12 +654,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift -; -; SLM-LABEL: 'splatvar_shift_v8i32' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> undef, i32 %b, i32 0 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer @@ -722,17 +662,11 @@ } define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v16i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -763,12 +697,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift -; -; SLM-LABEL: 'splatvar_shift_v16i32' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> undef, i32 %b, i32 0 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer @@ -949,7 +877,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift @@ -1004,7 +932,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift @@ -1059,7 +987,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll @@ -427,17 +427,11 @@ ; define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v2i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 @@ -476,17 +470,11 @@ } define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -525,17 +513,11 @@ } define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -574,17 +556,11 @@ } define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v4i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 @@ -623,17 +599,11 @@ } define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v8i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -672,17 +642,11 @@ } define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v16i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -875,7 +839,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift @@ -924,7 +888,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift @@ -973,7 +937,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll @@ -475,41 +475,11 @@ ; define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; AVX-LABEL: 'splatvar_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; XOP-LABEL: 'splatvar_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; AVX512-LABEL: 'splatvar_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; SLM-LABEL: 'splatvar_shift_v2i64' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; CHECK-LABEL: 'splatvar_shift_v2i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %insert = insertelement <2 x i64> undef, i64 %b, i32 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer @@ -518,17 +488,11 @@ } define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -559,12 +523,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift -; -; SLM-LABEL: 'splatvar_shift_v4i64' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> undef, i64 %b, i32 0 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer @@ -573,17 +531,11 @@ } define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; SSE-LABEL: 'splatvar_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -614,12 +566,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift -; -; SLM-LABEL: 'splatvar_shift_v8i64' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> undef, i64 %b, i32 0 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer @@ -628,41 +574,11 @@ } define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; AVX-LABEL: 'splatvar_shift_v4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; XOP-LABEL: 'splatvar_shift_v4i32' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; AVX512-LABEL: 'splatvar_shift_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; SLM-LABEL: 'splatvar_shift_v4i32' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; CHECK-LABEL: 'splatvar_shift_v4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %insert = insertelement <4 x i32> undef, i32 %b, i32 0 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer @@ -671,17 +587,11 @@ } define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v8i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -712,12 +622,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift -; -; SLM-LABEL: 'splatvar_shift_v8i32' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> undef, i32 %b, i32 0 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer @@ -726,17 +630,11 @@ } define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { -; SSE2-LABEL: 'splatvar_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift -; -; SSE42-LABEL: 'splatvar_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; SSE-LABEL: 'splatvar_shift_v16i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -767,12 +665,6 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift -; -; SLM-LABEL: 'splatvar_shift_v16i32' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> undef, i32 %b, i32 0 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer @@ -941,7 +833,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift @@ -996,7 +888,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift @@ -1051,7 +943,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll @@ -69,11 +69,11 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x double> [[SHUFFLE]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[A]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x double> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[A]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP6]], <4 x double>* [[TMP7]], align 8 ; CHECK-NEXT: ret i32 undef ; entry: @@ -196,11 +196,11 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x double> [[SHUFFLE]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[A]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x double> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[A]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP6]], <4 x double>* [[TMP7]], align 8 ; CHECK-NEXT: ret i32 undef ; entry: @@ -246,22 +246,22 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[CONV]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[A]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[A]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[N]], 4 ; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: ; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 2 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[N]], 4 ; CHECK-NEXT: [[CONV12:%.*]] = sitofp i32 [[ADD]] to double -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>* -; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP2]], double [[CONV12]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>* +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP2]], double [[CONV12]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8 ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: ret i32 0 @@ -352,18 +352,20 @@ ; CHECK-LABEL: @cse_for_hoisted_instructions_in_preheader( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> , [[TMP1]] ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[GEP_0]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP2]], <2 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[GEP_0]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[OR_2:%.*]] = or i32 [[A]], 3 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 10 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[GEP_2]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 4 +; CHECK-NEXT: store i32 [[OR_2]], i32* [[GEP_2]], align 4 +; CHECK-NEXT: [[OR_3:%.*]] = or i32 [[A]], 3 +; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 11 +; CHECK-NEXT: store i32 [[OR_3]], i32* [[GEP_3]], align 4 ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll @@ -11,11 +11,23 @@ ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ undef, [[BB1]] ], [ undef, [[BB:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[BB1]] ], [ undef, [[BB]] ] -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[TMP]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[SHUFFLE]]) -; CHECK-NEXT: [[OP_RDX]] = mul i32 [[TMP1]], undef +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP18:%.*]], [[BB1]] ], [ undef, [[BB]] ] +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 undef, [[TMP]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], [[TMP]] +; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], [[TMP]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP]] +; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], [[TMP]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP]] +; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], [[TMP]] +; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP]] +; CHECK-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], [[TMP]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], [[TMP]] +; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], [[TMP]] +; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], [[TMP]] +; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[TMP16]], [[TMP]] +; CHECK-NEXT: [[TMP18]] = mul i32 [[TMP17]], [[TMP]] ; CHECK-NEXT: br label [[BB1]] ; bb: @@ -51,10 +63,10 @@ ; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ undef, [[BB:%.*]] ], [ undef, [[BB2]] ] ; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, [[BB]] ], [ undef, [[BB2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[TMP]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[SHUFFLE]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP1]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP2]], [[TMP3]] ; CHECK-NEXT: [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], undef ; CHECK-NEXT: call void @use(i32 [[OP_RDX1]]) ; CHECK-NEXT: br label [[BB2]] @@ -96,66 +108,56 @@ ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ undef, [[BB1]] ], [ poison, [[BB2:%.*]] ] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> poison, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[TMP1]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[TMP1]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[TMP1]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[TMP1]], i32 8 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[TMP1]], i32 9 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[TMP1]], i32 10 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[TMP1]], i32 11 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 [[TMP1]], i32 12 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[TMP1]], i32 13 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x i32> [[TMP15]], i32 [[TMP1]], i32 14 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x i32> [[TMP16]], i32 [[TMP1]], i32 15 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <32 x i32> poison, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <32 x i32> [[TMP18]], i32 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <32 x i32> [[TMP19]], i32 [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <32 x i32> [[TMP20]], i32 [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <32 x i32> [[TMP21]], i32 [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <32 x i32> [[TMP22]], i32 [[TMP1]], i32 5 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <32 x i32> [[TMP23]], i32 [[TMP1]], i32 6 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <32 x i32> [[TMP24]], i32 [[TMP1]], i32 7 -; CHECK-NEXT: [[TMP26:%.*]] = insertelement <32 x i32> [[TMP25]], i32 [[TMP1]], i32 8 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <32 x i32> [[TMP26]], i32 [[TMP1]], i32 9 -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <32 x i32> [[TMP27]], i32 [[TMP1]], i32 10 -; CHECK-NEXT: [[TMP29:%.*]] = insertelement <32 x i32> [[TMP28]], i32 [[TMP1]], i32 11 -; CHECK-NEXT: [[TMP30:%.*]] = insertelement <32 x i32> [[TMP29]], i32 [[TMP1]], i32 12 -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <32 x i32> [[TMP30]], i32 [[TMP1]], i32 13 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <32 x i32> [[TMP31]], i32 [[TMP1]], i32 14 -; CHECK-NEXT: [[TMP33:%.*]] = insertelement <32 x i32> [[TMP32]], i32 [[TMP1]], i32 15 -; CHECK-NEXT: [[TMP34:%.*]] = insertelement <32 x i32> [[TMP33]], i32 [[TMP1]], i32 16 -; CHECK-NEXT: [[TMP35:%.*]] = insertelement <32 x i32> [[TMP34]], i32 [[TMP1]], i32 17 -; CHECK-NEXT: [[TMP36:%.*]] = insertelement <32 x i32> [[TMP35]], i32 [[TMP1]], i32 18 -; CHECK-NEXT: [[TMP37:%.*]] = insertelement <32 x i32> [[TMP36]], i32 [[TMP1]], i32 19 -; CHECK-NEXT: [[TMP38:%.*]] = insertelement <32 x i32> [[TMP37]], i32 [[TMP1]], i32 20 -; CHECK-NEXT: [[TMP39:%.*]] = insertelement <32 x i32> [[TMP38]], i32 [[TMP1]], i32 21 -; CHECK-NEXT: [[TMP40:%.*]] = insertelement <32 x i32> [[TMP39]], i32 [[TMP1]], i32 22 -; CHECK-NEXT: [[TMP41:%.*]] = insertelement <32 x i32> [[TMP40]], i32 [[TMP1]], i32 23 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <32 x i32> [[TMP41]], i32 [[TMP1]], i32 24 -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <32 x i32> [[TMP42]], i32 [[TMP1]], i32 25 -; CHECK-NEXT: [[TMP44:%.*]] = insertelement <32 x i32> [[TMP43]], i32 [[TMP1]], i32 26 -; CHECK-NEXT: [[TMP45:%.*]] = insertelement <32 x i32> [[TMP44]], i32 [[TMP1]], i32 27 -; CHECK-NEXT: [[TMP46:%.*]] = insertelement <32 x i32> [[TMP45]], i32 [[TMP1]], i32 28 -; CHECK-NEXT: [[TMP47:%.*]] = insertelement <32 x i32> [[TMP46]], i32 [[TMP1]], i32 29 -; CHECK-NEXT: [[TMP48:%.*]] = insertelement <32 x i32> [[TMP47]], i32 [[TMP1]], i32 30 -; CHECK-NEXT: [[TMP49:%.*]] = insertelement <32 x i32> [[TMP48]], i32 [[TMP1]], i32 31 -; CHECK-NEXT: [[TMP50:%.*]] = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> [[TMP49]]) -; CHECK-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[TMP17]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP50]], [[TMP51]] -; CHECK-NEXT: [[TMP52:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[SHUFFLE]]) -; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], [[TMP52]] -; CHECK-NEXT: [[OP_RDX2:%.*]] = mul i32 [[OP_RDX1]], [[TMP1]] -; CHECK-NEXT: [[OP_RDX3:%.*]] = mul i32 [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[OP_RDX4:%.*]] = mul i32 [[OP_RDX2]], [[OP_RDX3]] -; CHECK-NEXT: [[OP_RDX5:%.*]] = mul i32 [[OP_RDX4]], [[TMP1]] -; CHECK-NEXT: [[VAL64:%.*]] = add i32 undef, [[OP_RDX5]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <32 x i32> poison, i32 [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <32 x i32> [[TMP3]], i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <32 x i32> [[TMP4]], i32 [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <32 x i32> [[TMP5]], i32 [[TMP2]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <32 x i32> [[TMP6]], i32 [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <32 x i32> [[TMP7]], i32 [[TMP2]], i32 5 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <32 x i32> [[TMP8]], i32 [[TMP2]], i32 6 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <32 x i32> [[TMP9]], i32 [[TMP2]], i32 7 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <32 x i32> [[TMP10]], i32 [[TMP2]], i32 8 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <32 x i32> [[TMP11]], i32 [[TMP2]], i32 9 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <32 x i32> [[TMP12]], i32 [[TMP2]], i32 10 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <32 x i32> [[TMP13]], i32 [[TMP2]], i32 11 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <32 x i32> [[TMP14]], i32 [[TMP2]], i32 12 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <32 x i32> [[TMP15]], i32 [[TMP2]], i32 13 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <32 x i32> [[TMP16]], i32 [[TMP2]], i32 14 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <32 x i32> [[TMP17]], i32 [[TMP2]], i32 15 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <32 x i32> [[TMP18]], i32 [[TMP2]], i32 16 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <32 x i32> [[TMP19]], i32 [[TMP2]], i32 17 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <32 x i32> [[TMP20]], i32 [[TMP2]], i32 18 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <32 x i32> [[TMP21]], i32 [[TMP2]], i32 19 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <32 x i32> [[TMP22]], i32 [[TMP2]], i32 20 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <32 x i32> [[TMP23]], i32 [[TMP2]], i32 21 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <32 x i32> [[TMP24]], i32 [[TMP2]], i32 22 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <32 x i32> [[TMP25]], i32 [[TMP2]], i32 23 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <32 x i32> [[TMP26]], i32 [[TMP2]], i32 24 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <32 x i32> [[TMP27]], i32 [[TMP2]], i32 25 +; CHECK-NEXT: [[TMP29:%.*]] = insertelement <32 x i32> [[TMP28]], i32 [[TMP2]], i32 26 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <32 x i32> [[TMP29]], i32 [[TMP2]], i32 27 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <32 x i32> [[TMP30]], i32 [[TMP2]], i32 28 +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <32 x i32> [[TMP31]], i32 [[TMP2]], i32 29 +; CHECK-NEXT: [[TMP33:%.*]] = insertelement <32 x i32> [[TMP32]], i32 [[TMP2]], i32 30 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <32 x i32> [[TMP33]], i32 [[TMP2]], i32 31 +; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> [[TMP34]]) +; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[TMP1]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = mul i32 [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX3:%.*]] = mul i32 [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX4:%.*]] = mul i32 [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX5:%.*]] = mul i32 [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX6:%.*]] = mul i32 [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX7:%.*]] = mul i32 [[OP_RDX1]], [[OP_RDX2]] +; CHECK-NEXT: [[OP_RDX8:%.*]] = mul i32 [[OP_RDX3]], [[OP_RDX4]] +; CHECK-NEXT: [[OP_RDX9:%.*]] = mul i32 [[OP_RDX5]], [[OP_RDX6]] +; CHECK-NEXT: [[OP_RDX10:%.*]] = mul i32 [[OP_RDX7]], [[OP_RDX8]] +; CHECK-NEXT: [[OP_RDX11:%.*]] = mul i32 [[OP_RDX9]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX12:%.*]] = mul i32 [[OP_RDX10]], [[OP_RDX11]] +; CHECK-NEXT: [[VAL64:%.*]] = add i32 undef, [[OP_RDX12]] ; CHECK-NEXT: [[VAL65:%.*]] = sext i32 [[VAL64]] to i64 ; CHECK-NEXT: ret i64 [[VAL65]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll @@ -21,222 +21,233 @@ ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 3, i64 1), align 4 ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 3, i64 2), align 8 ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 3, i64 3), align 4 -; CHECK-NEXT: [[TMP13:%.*]] = load <16 x i32>, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 4, i64 0), align 16 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 4, i64 0), align 16 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 4, i64 1), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 4, i64 2), align 8 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 4, i64 3), align 4 +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 5, i64 0), align 16 +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 5, i64 1), align 4 +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 5, i64 2), align 8 +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 5, i64 3), align 4 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 6, i64 0), align 16 +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 6, i64 1), align 4 +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 6, i64 2), align 8 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 6, i64 3), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 7, i64 0), align 16 +; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 7, i64 1), align 4 +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 7, i64 2), align 8 +; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 7, i64 3), align 4 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[B_0:%.*]] = phi i32 [ [[SPEC_SELECT8_3_7:%.*]], [[FOR_COND]] ], [ undef, [[ENTRY]] ] -; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -183 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP16]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = sub <4 x i32> [[SHUFFLE]], [[TMP0]] -; CHECK-NEXT: [[TMP18:%.*]] = icmp slt <4 x i32> [[TMP17]], zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP18]], <4 x i32> [[TMP19]], <4 x i32> [[TMP17]] -; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP20]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 [[TMP21]], [[B_0]] -; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP21]], i32 [[B_0]] -; CHECK-NEXT: [[SUB_116:%.*]] = sub i32 [[TMP15]], [[TMP1]] -; CHECK-NEXT: [[TMP22:%.*]] = icmp slt i32 [[SUB_116]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], -183 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP30]], i32 0 +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i32> [[TMP31]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP33:%.*]] = sub <4 x i32> [[TMP32]], [[TMP0]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp slt <4 x i32> [[TMP33]], zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP33]] +; CHECK-NEXT: [[TMP36:%.*]] = select <4 x i1> [[TMP34]], <4 x i32> [[TMP35]], <4 x i32> [[TMP33]] +; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP36]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 [[TMP37]], [[B_0]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP37]], i32 [[B_0]] +; CHECK-NEXT: [[SUB_116:%.*]] = sub i32 [[TMP30]], [[TMP1]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp slt i32 [[SUB_116]], 0 ; CHECK-NEXT: [[NEG_117:%.*]] = sub nsw i32 0, [[SUB_116]] -; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[NEG_117]], i32 [[SUB_116]] -; CHECK-NEXT: [[CMP12_118:%.*]] = icmp slt i32 [[TMP23]], [[OP_RDX1]] -; CHECK-NEXT: [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP23]], i32 [[OP_RDX1]] -; CHECK-NEXT: [[SUB_1_1:%.*]] = sub i32 [[TMP15]], [[TMP2]] -; CHECK-NEXT: [[TMP24:%.*]] = icmp slt i32 [[SUB_1_1]], 0 +; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[NEG_117]], i32 [[SUB_116]] +; CHECK-NEXT: [[CMP12_118:%.*]] = icmp slt i32 [[TMP39]], [[OP_RDX1]] +; CHECK-NEXT: [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP39]], i32 [[OP_RDX1]] +; CHECK-NEXT: [[SUB_1_1:%.*]] = sub i32 [[TMP30]], [[TMP2]] +; CHECK-NEXT: [[TMP40:%.*]] = icmp slt i32 [[SUB_1_1]], 0 ; CHECK-NEXT: [[NEG_1_1:%.*]] = sub nsw i32 0, [[SUB_1_1]] -; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[NEG_1_1]], i32 [[SUB_1_1]] -; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP25]], [[SPEC_SELECT8_120]] +; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 [[NEG_1_1]], i32 [[SUB_1_1]] +; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP41]], [[SPEC_SELECT8_120]] ; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], [[CMP12_118]] -; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP25]], i32 [[SPEC_SELECT8_120]] -; CHECK-NEXT: [[SUB_2_1:%.*]] = sub i32 [[TMP15]], [[TMP3]] -; CHECK-NEXT: [[TMP26:%.*]] = icmp slt i32 [[SUB_2_1]], 0 +; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP41]], i32 [[SPEC_SELECT8_120]] +; CHECK-NEXT: [[SUB_2_1:%.*]] = sub i32 [[TMP30]], [[TMP3]] +; CHECK-NEXT: [[TMP42:%.*]] = icmp slt i32 [[SUB_2_1]], 0 ; CHECK-NEXT: [[NEG_2_1:%.*]] = sub nsw i32 0, [[SUB_2_1]] -; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[NEG_2_1]], i32 [[SUB_2_1]] -; CHECK-NEXT: [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP27]], [[SPEC_SELECT8_1_1]] +; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[NEG_2_1]], i32 [[SUB_2_1]] +; CHECK-NEXT: [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP43]], [[SPEC_SELECT8_1_1]] ; CHECK-NEXT: [[NARROW34:%.*]] = or i1 [[CMP12_2_1]], [[NARROW]] -; CHECK-NEXT: [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP27]], i32 [[SPEC_SELECT8_1_1]] -; CHECK-NEXT: [[SUB_3_1:%.*]] = sub i32 [[TMP15]], [[TMP4]] -; CHECK-NEXT: [[TMP28:%.*]] = icmp slt i32 [[SUB_3_1]], 0 +; CHECK-NEXT: [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP43]], i32 [[SPEC_SELECT8_1_1]] +; CHECK-NEXT: [[SUB_3_1:%.*]] = sub i32 [[TMP30]], [[TMP4]] +; CHECK-NEXT: [[TMP44:%.*]] = icmp slt i32 [[SUB_3_1]], 0 ; CHECK-NEXT: [[NEG_3_1:%.*]] = sub nsw i32 0, [[SUB_3_1]] -; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[NEG_3_1]], i32 [[SUB_3_1]] -; CHECK-NEXT: [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP29]], [[SPEC_SELECT8_2_1]] +; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[NEG_3_1]], i32 [[SUB_3_1]] +; CHECK-NEXT: [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP45]], [[SPEC_SELECT8_2_1]] ; CHECK-NEXT: [[NARROW35:%.*]] = or i1 [[CMP12_3_1]], [[NARROW34]] ; CHECK-NEXT: [[SPEC_SELECT_3_1:%.*]] = zext i1 [[NARROW35]] to i32 -; CHECK-NEXT: [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP29]], i32 [[SPEC_SELECT8_2_1]] -; CHECK-NEXT: [[SUB_222:%.*]] = sub i32 [[TMP15]], [[TMP5]] -; CHECK-NEXT: [[TMP30:%.*]] = icmp slt i32 [[SUB_222]], 0 +; CHECK-NEXT: [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP45]], i32 [[SPEC_SELECT8_2_1]] +; CHECK-NEXT: [[SUB_222:%.*]] = sub i32 [[TMP30]], [[TMP5]] +; CHECK-NEXT: [[TMP46:%.*]] = icmp slt i32 [[SUB_222]], 0 ; CHECK-NEXT: [[NEG_223:%.*]] = sub nsw i32 0, [[SUB_222]] -; CHECK-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[NEG_223]], i32 [[SUB_222]] -; CHECK-NEXT: [[CMP12_224:%.*]] = icmp slt i32 [[TMP31]], [[SPEC_SELECT8_3_1]] -; CHECK-NEXT: [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP31]], i32 [[SPEC_SELECT8_3_1]] -; CHECK-NEXT: [[SUB_1_2:%.*]] = sub i32 [[TMP15]], [[TMP6]] -; CHECK-NEXT: [[TMP32:%.*]] = icmp slt i32 [[SUB_1_2]], 0 +; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[NEG_223]], i32 [[SUB_222]] +; CHECK-NEXT: [[CMP12_224:%.*]] = icmp slt i32 [[TMP47]], [[SPEC_SELECT8_3_1]] +; CHECK-NEXT: [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP47]], i32 [[SPEC_SELECT8_3_1]] +; CHECK-NEXT: [[SUB_1_2:%.*]] = sub i32 [[TMP30]], [[TMP6]] +; CHECK-NEXT: [[TMP48:%.*]] = icmp slt i32 [[SUB_1_2]], 0 ; CHECK-NEXT: [[NEG_1_2:%.*]] = sub nsw i32 0, [[SUB_1_2]] -; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[NEG_1_2]], i32 [[SUB_1_2]] -; CHECK-NEXT: [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP33]], [[SPEC_SELECT8_226]] -; CHECK-NEXT: [[TMP34:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]] -; CHECK-NEXT: [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP33]], i32 [[SPEC_SELECT8_226]] -; CHECK-NEXT: [[SUB_2_2:%.*]] = sub i32 [[TMP15]], [[TMP7]] -; CHECK-NEXT: [[TMP35:%.*]] = icmp slt i32 [[SUB_2_2]], 0 +; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP48]], i32 [[NEG_1_2]], i32 [[SUB_1_2]] +; CHECK-NEXT: [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP49]], [[SPEC_SELECT8_226]] +; CHECK-NEXT: [[TMP50:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]] +; CHECK-NEXT: [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP49]], i32 [[SPEC_SELECT8_226]] +; CHECK-NEXT: [[SUB_2_2:%.*]] = sub i32 [[TMP30]], [[TMP7]] +; CHECK-NEXT: [[TMP51:%.*]] = icmp slt i32 [[SUB_2_2]], 0 ; CHECK-NEXT: [[NEG_2_2:%.*]] = sub nsw i32 0, [[SUB_2_2]] -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[NEG_2_2]], i32 [[SUB_2_2]] -; CHECK-NEXT: [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP36]], [[SPEC_SELECT8_1_2]] -; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[CMP12_2_2]], [[TMP34]] -; CHECK-NEXT: [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP36]], i32 [[SPEC_SELECT8_1_2]] -; CHECK-NEXT: [[SUB_3_2:%.*]] = sub i32 [[TMP15]], [[TMP8]] -; CHECK-NEXT: [[TMP38:%.*]] = icmp slt i32 [[SUB_3_2]], 0 +; CHECK-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[NEG_2_2]], i32 [[SUB_2_2]] +; CHECK-NEXT: [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP52]], [[SPEC_SELECT8_1_2]] +; CHECK-NEXT: [[TMP53:%.*]] = or i1 [[CMP12_2_2]], [[TMP50]] +; CHECK-NEXT: [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP52]], i32 [[SPEC_SELECT8_1_2]] +; CHECK-NEXT: [[SUB_3_2:%.*]] = sub i32 [[TMP30]], [[TMP8]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp slt i32 [[SUB_3_2]], 0 ; CHECK-NEXT: [[NEG_3_2:%.*]] = sub nsw i32 0, [[SUB_3_2]] -; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[NEG_3_2]], i32 [[SUB_3_2]] -; CHECK-NEXT: [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP39]], [[SPEC_SELECT8_2_2]] -; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[CMP12_3_2]], [[TMP37]] -; CHECK-NEXT: [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP40]], i32 2, i32 [[SPEC_SELECT_3_1]] -; CHECK-NEXT: [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP39]], i32 [[SPEC_SELECT8_2_2]] -; CHECK-NEXT: [[SUB_328:%.*]] = sub i32 [[TMP15]], [[TMP9]] -; CHECK-NEXT: [[TMP41:%.*]] = icmp slt i32 [[SUB_328]], 0 +; CHECK-NEXT: [[TMP55:%.*]] = select i1 [[TMP54]], i32 [[NEG_3_2]], i32 [[SUB_3_2]] +; CHECK-NEXT: [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP55]], [[SPEC_SELECT8_2_2]] +; CHECK-NEXT: [[TMP56:%.*]] = or i1 [[CMP12_3_2]], [[TMP53]] +; CHECK-NEXT: [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP56]], i32 2, i32 [[SPEC_SELECT_3_1]] +; CHECK-NEXT: [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP55]], i32 [[SPEC_SELECT8_2_2]] +; CHECK-NEXT: [[SUB_328:%.*]] = sub i32 [[TMP30]], [[TMP9]] +; CHECK-NEXT: [[TMP57:%.*]] = icmp slt i32 [[SUB_328]], 0 ; CHECK-NEXT: [[NEG_329:%.*]] = sub nsw i32 0, [[SUB_328]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[NEG_329]], i32 [[SUB_328]] -; CHECK-NEXT: [[CMP12_330:%.*]] = icmp slt i32 [[TMP42]], [[SPEC_SELECT8_3_2]] -; CHECK-NEXT: [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP42]], i32 [[SPEC_SELECT8_3_2]] -; CHECK-NEXT: [[SUB_1_3:%.*]] = sub i32 [[TMP15]], [[TMP10]] -; CHECK-NEXT: [[TMP43:%.*]] = icmp slt i32 [[SUB_1_3]], 0 +; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[NEG_329]], i32 [[SUB_328]] +; CHECK-NEXT: [[CMP12_330:%.*]] = icmp slt i32 [[TMP58]], [[SPEC_SELECT8_3_2]] +; CHECK-NEXT: [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP58]], i32 [[SPEC_SELECT8_3_2]] +; CHECK-NEXT: [[SUB_1_3:%.*]] = sub i32 [[TMP30]], [[TMP10]] +; CHECK-NEXT: [[TMP59:%.*]] = icmp slt i32 [[SUB_1_3]], 0 ; CHECK-NEXT: [[NEG_1_3:%.*]] = sub nsw i32 0, [[SUB_1_3]] -; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[NEG_1_3]], i32 [[SUB_1_3]] -; CHECK-NEXT: [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP44]], [[SPEC_SELECT8_332]] -; CHECK-NEXT: [[TMP45:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]] -; CHECK-NEXT: [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP44]], i32 [[SPEC_SELECT8_332]] -; CHECK-NEXT: [[SUB_2_3:%.*]] = sub i32 [[TMP15]], [[TMP11]] -; CHECK-NEXT: [[TMP46:%.*]] = icmp slt i32 [[SUB_2_3]], 0 +; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[NEG_1_3]], i32 [[SUB_1_3]] +; CHECK-NEXT: [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP60]], [[SPEC_SELECT8_332]] +; CHECK-NEXT: [[TMP61:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]] +; CHECK-NEXT: [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP60]], i32 [[SPEC_SELECT8_332]] +; CHECK-NEXT: [[SUB_2_3:%.*]] = sub i32 [[TMP30]], [[TMP11]] +; CHECK-NEXT: [[TMP62:%.*]] = icmp slt i32 [[SUB_2_3]], 0 ; CHECK-NEXT: [[NEG_2_3:%.*]] = sub nsw i32 0, [[SUB_2_3]] -; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[NEG_2_3]], i32 [[SUB_2_3]] -; CHECK-NEXT: [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP47]], [[SPEC_SELECT8_1_3]] -; CHECK-NEXT: [[TMP48:%.*]] = or i1 [[CMP12_2_3]], [[TMP45]] -; CHECK-NEXT: [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP47]], i32 [[SPEC_SELECT8_1_3]] -; CHECK-NEXT: [[SUB_3_3:%.*]] = sub i32 [[TMP15]], [[TMP12]] -; CHECK-NEXT: [[TMP49:%.*]] = icmp slt i32 [[SUB_3_3]], 0 +; CHECK-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[NEG_2_3]], i32 [[SUB_2_3]] +; CHECK-NEXT: [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP63]], [[SPEC_SELECT8_1_3]] +; CHECK-NEXT: [[TMP64:%.*]] = or i1 [[CMP12_2_3]], [[TMP61]] +; CHECK-NEXT: [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP63]], i32 [[SPEC_SELECT8_1_3]] +; CHECK-NEXT: [[SUB_3_3:%.*]] = sub i32 [[TMP30]], [[TMP12]] +; CHECK-NEXT: [[TMP65:%.*]] = icmp slt i32 [[SUB_3_3]], 0 ; CHECK-NEXT: [[NEG_3_3:%.*]] = sub nsw i32 0, [[SUB_3_3]] -; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[NEG_3_3]], i32 [[SUB_3_3]] -; CHECK-NEXT: [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP50]], [[SPEC_SELECT8_2_3]] -; CHECK-NEXT: [[TMP51:%.*]] = or i1 [[CMP12_3_3]], [[TMP48]] -; CHECK-NEXT: [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP51]], i32 3, i32 [[SPEC_SELECT_3_2]] -; CHECK-NEXT: [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP50]], i32 [[SPEC_SELECT8_2_3]] -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x i32> poison, i32 [[TMP15]], i32 0 -; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x i32> [[TMP52]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP53:%.*]] = sub <16 x i32> [[SHUFFLE2]], [[TMP13]] -; CHECK-NEXT: [[TMP54:%.*]] = extractelement <16 x i32> [[TMP53]], i32 0 -; CHECK-NEXT: [[NEG_4:%.*]] = sub nsw i32 0, [[TMP54]] -; CHECK-NEXT: [[TMP55:%.*]] = icmp slt <16 x i32> [[TMP53]], zeroinitializer -; CHECK-NEXT: [[TMP56:%.*]] = extractelement <16 x i1> [[TMP55]], i32 0 -; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[NEG_4]], i32 [[TMP54]] -; CHECK-NEXT: [[CMP12_4:%.*]] = icmp slt i32 [[TMP57]], [[SPEC_SELECT8_3_3]] -; CHECK-NEXT: [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP57]], i32 [[SPEC_SELECT8_3_3]] -; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i32> [[TMP53]], i32 1 -; CHECK-NEXT: [[NEG_1_4:%.*]] = sub nsw i32 0, [[TMP58]] -; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP55]], i32 1 -; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[NEG_1_4]], i32 [[TMP58]] -; CHECK-NEXT: [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP60]], [[SPEC_SELECT8_4]] -; CHECK-NEXT: [[TMP61:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]] -; CHECK-NEXT: [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP60]], i32 [[SPEC_SELECT8_4]] -; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i32> [[TMP53]], i32 2 -; CHECK-NEXT: [[NEG_2_4:%.*]] = sub nsw i32 0, [[TMP62]] -; CHECK-NEXT: [[TMP63:%.*]] = extractelement <16 x i1> [[TMP55]], i32 2 -; CHECK-NEXT: [[TMP64:%.*]] = select i1 [[TMP63]], i32 [[NEG_2_4]], i32 [[TMP62]] -; CHECK-NEXT: [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP64]], [[SPEC_SELECT8_1_4]] -; CHECK-NEXT: [[TMP65:%.*]] = or i1 [[CMP12_2_4]], [[TMP61]] -; CHECK-NEXT: [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP64]], i32 [[SPEC_SELECT8_1_4]] -; CHECK-NEXT: [[TMP66:%.*]] = extractelement <16 x i32> [[TMP53]], i32 3 -; CHECK-NEXT: [[NEG_3_4:%.*]] = sub nsw i32 0, [[TMP66]] -; CHECK-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[TMP55]], i32 3 -; CHECK-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], i32 [[NEG_3_4]], i32 [[TMP66]] -; CHECK-NEXT: [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP68]], [[SPEC_SELECT8_2_4]] -; CHECK-NEXT: [[TMP69:%.*]] = or i1 [[CMP12_3_4]], [[TMP65]] -; CHECK-NEXT: [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP69]], i32 4, i32 [[SPEC_SELECT_3_3]] -; CHECK-NEXT: [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP68]], i32 [[SPEC_SELECT8_2_4]] -; CHECK-NEXT: [[TMP70:%.*]] = extractelement <16 x i32> [[TMP53]], i32 4 -; CHECK-NEXT: [[NEG_5:%.*]] = sub nsw i32 0, [[TMP70]] -; CHECK-NEXT: [[TMP71:%.*]] = extractelement <16 x i1> [[TMP55]], i32 4 -; CHECK-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[NEG_5]], i32 [[TMP70]] -; CHECK-NEXT: [[CMP12_5:%.*]] = icmp slt i32 [[TMP72]], [[SPEC_SELECT8_3_4]] -; CHECK-NEXT: [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP72]], i32 [[SPEC_SELECT8_3_4]] -; CHECK-NEXT: [[TMP73:%.*]] = extractelement <16 x i32> [[TMP53]], i32 5 -; CHECK-NEXT: [[NEG_1_5:%.*]] = sub nsw i32 0, [[TMP73]] -; CHECK-NEXT: [[TMP74:%.*]] = extractelement <16 x i1> [[TMP55]], i32 5 -; CHECK-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[NEG_1_5]], i32 [[TMP73]] -; CHECK-NEXT: [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP75]], [[SPEC_SELECT8_5]] -; CHECK-NEXT: [[TMP76:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]] -; CHECK-NEXT: [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP75]], i32 [[SPEC_SELECT8_5]] -; CHECK-NEXT: [[TMP77:%.*]] = extractelement <16 x i32> [[TMP53]], i32 6 -; CHECK-NEXT: [[NEG_2_5:%.*]] = sub nsw i32 0, [[TMP77]] -; CHECK-NEXT: [[TMP78:%.*]] = extractelement <16 x i1> [[TMP55]], i32 6 -; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 [[NEG_2_5]], i32 [[TMP77]] -; CHECK-NEXT: [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP79]], [[SPEC_SELECT8_1_5]] -; CHECK-NEXT: [[TMP80:%.*]] = or i1 [[CMP12_2_5]], [[TMP76]] -; CHECK-NEXT: [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP79]], i32 [[SPEC_SELECT8_1_5]] -; CHECK-NEXT: [[TMP81:%.*]] = extractelement <16 x i32> [[TMP53]], i32 7 -; CHECK-NEXT: [[NEG_3_5:%.*]] = sub nsw i32 0, [[TMP81]] -; CHECK-NEXT: [[TMP82:%.*]] = extractelement <16 x i1> [[TMP55]], i32 7 -; CHECK-NEXT: [[TMP83:%.*]] = select i1 [[TMP82]], i32 [[NEG_3_5]], i32 [[TMP81]] -; CHECK-NEXT: [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP83]], [[SPEC_SELECT8_2_5]] -; CHECK-NEXT: [[TMP84:%.*]] = or i1 [[CMP12_3_5]], [[TMP80]] -; CHECK-NEXT: [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP84]], i32 5, i32 [[SPEC_SELECT_3_4]] -; CHECK-NEXT: [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP83]], i32 [[SPEC_SELECT8_2_5]] -; CHECK-NEXT: [[TMP85:%.*]] = extractelement <16 x i32> [[TMP53]], i32 8 -; CHECK-NEXT: [[NEG_6:%.*]] = sub nsw i32 0, [[TMP85]] -; CHECK-NEXT: [[TMP86:%.*]] = extractelement <16 x i1> [[TMP55]], i32 8 -; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[NEG_6]], i32 [[TMP85]] -; CHECK-NEXT: [[CMP12_6:%.*]] = icmp slt i32 [[TMP87]], [[SPEC_SELECT8_3_5]] -; CHECK-NEXT: [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP87]], i32 [[SPEC_SELECT8_3_5]] -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <16 x i32> [[TMP53]], i32 9 -; CHECK-NEXT: [[NEG_1_6:%.*]] = sub nsw i32 0, [[TMP88]] -; CHECK-NEXT: [[TMP89:%.*]] = extractelement <16 x i1> [[TMP55]], i32 9 -; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_1_6]], i32 [[TMP88]] -; CHECK-NEXT: [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_6]] -; CHECK-NEXT: [[TMP91:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]] -; CHECK-NEXT: [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP90]], i32 [[SPEC_SELECT8_6]] -; CHECK-NEXT: [[TMP92:%.*]] = extractelement <16 x i32> [[TMP53]], i32 10 -; CHECK-NEXT: [[NEG_2_6:%.*]] = sub nsw i32 0, [[TMP92]] -; CHECK-NEXT: [[TMP93:%.*]] = extractelement <16 x i1> [[TMP55]], i32 10 -; CHECK-NEXT: [[TMP94:%.*]] = select i1 [[TMP93]], i32 [[NEG_2_6]], i32 [[TMP92]] -; CHECK-NEXT: [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP94]], [[SPEC_SELECT8_1_6]] -; CHECK-NEXT: [[TMP95:%.*]] = or i1 [[CMP12_2_6]], [[TMP91]] -; CHECK-NEXT: [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP94]], i32 [[SPEC_SELECT8_1_6]] -; CHECK-NEXT: [[TMP96:%.*]] = extractelement <16 x i32> [[TMP53]], i32 11 -; CHECK-NEXT: [[NEG_3_6:%.*]] = sub nsw i32 0, [[TMP96]] -; CHECK-NEXT: [[TMP97:%.*]] = extractelement <16 x i1> [[TMP55]], i32 11 -; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP97]], i32 [[NEG_3_6]], i32 [[TMP96]] -; CHECK-NEXT: [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP98]], [[SPEC_SELECT8_2_6]] -; CHECK-NEXT: [[TMP99:%.*]] = or i1 [[CMP12_3_6]], [[TMP95]] -; CHECK-NEXT: [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP99]], i32 6, i32 [[SPEC_SELECT_3_5]] -; CHECK-NEXT: [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP98]], i32 [[SPEC_SELECT8_2_6]] -; CHECK-NEXT: [[TMP100:%.*]] = extractelement <16 x i32> [[TMP53]], i32 12 -; CHECK-NEXT: [[NEG_7:%.*]] = sub nsw i32 0, [[TMP100]] -; CHECK-NEXT: [[TMP101:%.*]] = extractelement <16 x i1> [[TMP55]], i32 12 -; CHECK-NEXT: [[TMP102:%.*]] = select i1 [[TMP101]], i32 [[NEG_7]], i32 [[TMP100]] +; CHECK-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], i32 [[NEG_3_3]], i32 [[SUB_3_3]] +; CHECK-NEXT: [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP66]], [[SPEC_SELECT8_2_3]] +; CHECK-NEXT: [[TMP67:%.*]] = or i1 [[CMP12_3_3]], [[TMP64]] +; CHECK-NEXT: [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP67]], i32 3, i32 [[SPEC_SELECT_3_2]] +; CHECK-NEXT: [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP66]], i32 [[SPEC_SELECT8_2_3]] +; CHECK-NEXT: [[SUB_4:%.*]] = sub i32 [[TMP30]], [[TMP13]] +; CHECK-NEXT: [[TMP68:%.*]] = icmp slt i32 [[SUB_4]], 0 +; CHECK-NEXT: [[NEG_4:%.*]] = sub nsw i32 0, [[SUB_4]] +; CHECK-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[NEG_4]], i32 [[SUB_4]] +; CHECK-NEXT: [[CMP12_4:%.*]] = icmp slt i32 [[TMP69]], [[SPEC_SELECT8_3_3]] +; CHECK-NEXT: [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP69]], i32 [[SPEC_SELECT8_3_3]] +; CHECK-NEXT: [[SUB_1_4:%.*]] = sub i32 [[TMP30]], [[TMP14]] +; CHECK-NEXT: [[TMP70:%.*]] = icmp slt i32 [[SUB_1_4]], 0 +; CHECK-NEXT: [[NEG_1_4:%.*]] = sub nsw i32 0, [[SUB_1_4]] +; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[NEG_1_4]], i32 [[SUB_1_4]] +; CHECK-NEXT: [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP71]], [[SPEC_SELECT8_4]] +; CHECK-NEXT: [[TMP72:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]] +; CHECK-NEXT: [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP71]], i32 [[SPEC_SELECT8_4]] +; CHECK-NEXT: [[SUB_2_4:%.*]] = sub i32 [[TMP30]], [[TMP15]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp slt i32 [[SUB_2_4]], 0 +; CHECK-NEXT: [[NEG_2_4:%.*]] = sub nsw i32 0, [[SUB_2_4]] +; CHECK-NEXT: [[TMP74:%.*]] = select i1 [[TMP73]], i32 [[NEG_2_4]], i32 [[SUB_2_4]] +; CHECK-NEXT: [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP74]], [[SPEC_SELECT8_1_4]] +; CHECK-NEXT: [[TMP75:%.*]] = or i1 [[CMP12_2_4]], [[TMP72]] +; CHECK-NEXT: [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP74]], i32 [[SPEC_SELECT8_1_4]] +; CHECK-NEXT: [[SUB_3_4:%.*]] = sub i32 [[TMP30]], [[TMP16]] +; CHECK-NEXT: [[TMP76:%.*]] = icmp slt i32 [[SUB_3_4]], 0 +; CHECK-NEXT: [[NEG_3_4:%.*]] = sub nsw i32 0, [[SUB_3_4]] +; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], i32 [[NEG_3_4]], i32 [[SUB_3_4]] +; CHECK-NEXT: [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP77]], [[SPEC_SELECT8_2_4]] +; CHECK-NEXT: [[TMP78:%.*]] = or i1 [[CMP12_3_4]], [[TMP75]] +; CHECK-NEXT: [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP78]], i32 4, i32 [[SPEC_SELECT_3_3]] +; CHECK-NEXT: [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP77]], i32 [[SPEC_SELECT8_2_4]] +; CHECK-NEXT: [[SUB_5:%.*]] = sub i32 [[TMP30]], [[TMP17]] +; CHECK-NEXT: [[TMP79:%.*]] = icmp slt i32 [[SUB_5]], 0 +; CHECK-NEXT: [[NEG_5:%.*]] = sub nsw i32 0, [[SUB_5]] +; CHECK-NEXT: [[TMP80:%.*]] = select i1 [[TMP79]], i32 [[NEG_5]], i32 [[SUB_5]] +; CHECK-NEXT: [[CMP12_5:%.*]] = icmp slt i32 [[TMP80]], [[SPEC_SELECT8_3_4]] +; CHECK-NEXT: [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP80]], i32 [[SPEC_SELECT8_3_4]] +; CHECK-NEXT: [[SUB_1_5:%.*]] = sub i32 [[TMP30]], [[TMP18]] +; CHECK-NEXT: [[TMP81:%.*]] = icmp slt i32 [[SUB_1_5]], 0 +; CHECK-NEXT: [[NEG_1_5:%.*]] = sub nsw i32 0, [[SUB_1_5]] +; CHECK-NEXT: [[TMP82:%.*]] = select i1 [[TMP81]], i32 [[NEG_1_5]], i32 [[SUB_1_5]] +; CHECK-NEXT: [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP82]], [[SPEC_SELECT8_5]] +; CHECK-NEXT: [[TMP83:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]] +; CHECK-NEXT: [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP82]], i32 [[SPEC_SELECT8_5]] +; CHECK-NEXT: [[SUB_2_5:%.*]] = sub i32 [[TMP30]], [[TMP19]] +; CHECK-NEXT: [[TMP84:%.*]] = icmp slt i32 [[SUB_2_5]], 0 +; CHECK-NEXT: [[NEG_2_5:%.*]] = sub nsw i32 0, [[SUB_2_5]] +; CHECK-NEXT: [[TMP85:%.*]] = select i1 [[TMP84]], i32 [[NEG_2_5]], i32 [[SUB_2_5]] +; CHECK-NEXT: [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP85]], [[SPEC_SELECT8_1_5]] +; CHECK-NEXT: [[TMP86:%.*]] = or i1 [[CMP12_2_5]], [[TMP83]] +; CHECK-NEXT: [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP85]], i32 [[SPEC_SELECT8_1_5]] +; CHECK-NEXT: [[SUB_3_5:%.*]] = sub i32 [[TMP30]], [[TMP20]] +; CHECK-NEXT: [[TMP87:%.*]] = icmp slt i32 [[SUB_3_5]], 0 +; CHECK-NEXT: [[NEG_3_5:%.*]] = sub nsw i32 0, [[SUB_3_5]] +; CHECK-NEXT: [[TMP88:%.*]] = select i1 [[TMP87]], i32 [[NEG_3_5]], i32 [[SUB_3_5]] +; CHECK-NEXT: [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP88]], [[SPEC_SELECT8_2_5]] +; CHECK-NEXT: [[TMP89:%.*]] = or i1 [[CMP12_3_5]], [[TMP86]] +; CHECK-NEXT: [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP89]], i32 5, i32 [[SPEC_SELECT_3_4]] +; CHECK-NEXT: [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP88]], i32 [[SPEC_SELECT8_2_5]] +; CHECK-NEXT: [[SUB_6:%.*]] = sub i32 [[TMP30]], [[TMP21]] +; CHECK-NEXT: [[TMP90:%.*]] = icmp slt i32 [[SUB_6]], 0 +; CHECK-NEXT: [[NEG_6:%.*]] = sub nsw i32 0, [[SUB_6]] +; CHECK-NEXT: [[TMP91:%.*]] = select i1 [[TMP90]], i32 [[NEG_6]], i32 [[SUB_6]] +; CHECK-NEXT: [[CMP12_6:%.*]] = icmp slt i32 [[TMP91]], [[SPEC_SELECT8_3_5]] +; CHECK-NEXT: [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP91]], i32 [[SPEC_SELECT8_3_5]] +; CHECK-NEXT: [[SUB_1_6:%.*]] = sub i32 [[TMP30]], [[TMP22]] +; CHECK-NEXT: [[TMP92:%.*]] = icmp slt i32 [[SUB_1_6]], 0 +; CHECK-NEXT: [[NEG_1_6:%.*]] = sub nsw i32 0, [[SUB_1_6]] +; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[NEG_1_6]], i32 [[SUB_1_6]] +; CHECK-NEXT: [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP93]], [[SPEC_SELECT8_6]] +; CHECK-NEXT: [[TMP94:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]] +; CHECK-NEXT: [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP93]], i32 [[SPEC_SELECT8_6]] +; CHECK-NEXT: [[SUB_2_6:%.*]] = sub i32 [[TMP30]], [[TMP23]] +; CHECK-NEXT: [[TMP95:%.*]] = icmp slt i32 [[SUB_2_6]], 0 +; CHECK-NEXT: [[NEG_2_6:%.*]] = sub nsw i32 0, [[SUB_2_6]] +; CHECK-NEXT: [[TMP96:%.*]] = select i1 [[TMP95]], i32 [[NEG_2_6]], i32 [[SUB_2_6]] +; CHECK-NEXT: [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP96]], [[SPEC_SELECT8_1_6]] +; CHECK-NEXT: [[TMP97:%.*]] = or i1 [[CMP12_2_6]], [[TMP94]] +; CHECK-NEXT: [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP96]], i32 [[SPEC_SELECT8_1_6]] +; CHECK-NEXT: [[SUB_3_6:%.*]] = sub i32 [[TMP30]], [[TMP24]] +; CHECK-NEXT: [[TMP98:%.*]] = icmp slt i32 [[SUB_3_6]], 0 +; CHECK-NEXT: [[NEG_3_6:%.*]] = sub nsw i32 0, [[SUB_3_6]] +; CHECK-NEXT: [[TMP99:%.*]] = select i1 [[TMP98]], i32 [[NEG_3_6]], i32 [[SUB_3_6]] +; CHECK-NEXT: [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP99]], [[SPEC_SELECT8_2_6]] +; CHECK-NEXT: [[TMP100:%.*]] = or i1 [[CMP12_3_6]], [[TMP97]] +; CHECK-NEXT: [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP100]], i32 6, i32 [[SPEC_SELECT_3_5]] +; CHECK-NEXT: [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP99]], i32 [[SPEC_SELECT8_2_6]] +; CHECK-NEXT: [[SUB_7:%.*]] = sub i32 [[TMP30]], [[TMP25]] +; CHECK-NEXT: [[TMP101:%.*]] = icmp slt i32 [[SUB_7]], 0 +; CHECK-NEXT: [[NEG_7:%.*]] = sub nsw i32 0, [[SUB_7]] +; CHECK-NEXT: [[TMP102:%.*]] = select i1 [[TMP101]], i32 [[NEG_7]], i32 [[SUB_7]] ; CHECK-NEXT: [[CMP12_7:%.*]] = icmp slt i32 [[TMP102]], [[SPEC_SELECT8_3_6]] ; CHECK-NEXT: [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP102]], i32 [[SPEC_SELECT8_3_6]] -; CHECK-NEXT: [[TMP103:%.*]] = extractelement <16 x i32> [[TMP53]], i32 13 -; CHECK-NEXT: [[NEG_1_7:%.*]] = sub nsw i32 0, [[TMP103]] -; CHECK-NEXT: [[TMP104:%.*]] = extractelement <16 x i1> [[TMP55]], i32 13 -; CHECK-NEXT: [[TMP105:%.*]] = select i1 [[TMP104]], i32 [[NEG_1_7]], i32 [[TMP103]] -; CHECK-NEXT: [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP105]], [[SPEC_SELECT8_7]] -; CHECK-NEXT: [[TMP106:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]] -; CHECK-NEXT: [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP105]], i32 [[SPEC_SELECT8_7]] -; CHECK-NEXT: [[TMP107:%.*]] = extractelement <16 x i32> [[TMP53]], i32 14 -; CHECK-NEXT: [[NEG_2_7:%.*]] = sub nsw i32 0, [[TMP107]] -; CHECK-NEXT: [[TMP108:%.*]] = extractelement <16 x i1> [[TMP55]], i32 14 -; CHECK-NEXT: [[TMP109:%.*]] = select i1 [[TMP108]], i32 [[NEG_2_7]], i32 [[TMP107]] -; CHECK-NEXT: [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP109]], [[SPEC_SELECT8_1_7]] -; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[CMP12_2_7]], [[TMP106]] -; CHECK-NEXT: [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP109]], i32 [[SPEC_SELECT8_1_7]] -; CHECK-NEXT: [[TMP111:%.*]] = extractelement <16 x i32> [[TMP53]], i32 15 -; CHECK-NEXT: [[NEG_3_7:%.*]] = sub nsw i32 0, [[TMP111]] -; CHECK-NEXT: [[TMP112:%.*]] = extractelement <16 x i1> [[TMP55]], i32 15 -; CHECK-NEXT: [[TMP113:%.*]] = select i1 [[TMP112]], i32 [[NEG_3_7]], i32 [[TMP111]] -; CHECK-NEXT: [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP113]], [[SPEC_SELECT8_2_7]] -; CHECK-NEXT: [[TMP114:%.*]] = or i1 [[CMP12_3_7]], [[TMP110]] -; CHECK-NEXT: [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP114]], i32 7, i32 [[SPEC_SELECT_3_6]] -; CHECK-NEXT: [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP113]], i32 [[SPEC_SELECT8_2_7]] +; CHECK-NEXT: [[SUB_1_7:%.*]] = sub i32 [[TMP30]], [[TMP26]] +; CHECK-NEXT: [[TMP103:%.*]] = icmp slt i32 [[SUB_1_7]], 0 +; CHECK-NEXT: [[NEG_1_7:%.*]] = sub nsw i32 0, [[SUB_1_7]] +; CHECK-NEXT: [[TMP104:%.*]] = select i1 [[TMP103]], i32 [[NEG_1_7]], i32 [[SUB_1_7]] +; CHECK-NEXT: [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP104]], [[SPEC_SELECT8_7]] +; CHECK-NEXT: [[TMP105:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]] +; CHECK-NEXT: [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP104]], i32 [[SPEC_SELECT8_7]] +; CHECK-NEXT: [[SUB_2_7:%.*]] = sub i32 [[TMP30]], [[TMP27]] +; CHECK-NEXT: [[TMP106:%.*]] = icmp slt i32 [[SUB_2_7]], 0 +; CHECK-NEXT: [[NEG_2_7:%.*]] = sub nsw i32 0, [[SUB_2_7]] +; CHECK-NEXT: [[TMP107:%.*]] = select i1 [[TMP106]], i32 [[NEG_2_7]], i32 [[SUB_2_7]] +; CHECK-NEXT: [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP107]], [[SPEC_SELECT8_1_7]] +; CHECK-NEXT: [[TMP108:%.*]] = or i1 [[CMP12_2_7]], [[TMP105]] +; CHECK-NEXT: [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP107]], i32 [[SPEC_SELECT8_1_7]] +; CHECK-NEXT: [[SUB_3_7:%.*]] = sub i32 [[TMP30]], [[TMP28]] +; CHECK-NEXT: [[TMP109:%.*]] = icmp slt i32 [[SUB_3_7]], 0 +; CHECK-NEXT: [[NEG_3_7:%.*]] = sub nsw i32 0, [[SUB_3_7]] +; CHECK-NEXT: [[TMP110:%.*]] = select i1 [[TMP109]], i32 [[NEG_3_7]], i32 [[SUB_3_7]] +; CHECK-NEXT: [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP110]], [[SPEC_SELECT8_2_7]] +; CHECK-NEXT: [[TMP111:%.*]] = or i1 [[CMP12_3_7]], [[TMP108]] +; CHECK-NEXT: [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP111]], i32 7, i32 [[SPEC_SELECT_3_6]] +; CHECK-NEXT: [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP110]], i32 [[SPEC_SELECT8_2_7]] ; CHECK-NEXT: [[K:%.*]] = getelementptr inbounds [366 x i32], ptr @l, i64 0, i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 [[SPEC_SELECT_3_7]], ptr [[K]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1