diff --git a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h --- a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h +++ b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h @@ -7,8 +7,9 @@ //===----------------------------------------------------------------------===// // /// \file -/// This pass converts vector operations into scalar operations, in order -/// to expose optimization opportunities on the individual scalar operations. +/// This pass converts vector operations into scalar operations (or, optionally, +/// operations on smaller vector widths), in order to expose optimization +/// opportunities on the individual scalar operations. /// It is mainly intended for targets that do not have vector units, but it /// may also be useful for revectorizing code to different vector widths. // @@ -26,24 +27,29 @@ class FunctionPass; struct ScalarizerPassOptions { - // These optional booleans correspond 1:1 to cl::opt options defined in + // These options correspond 1:1 to cl::opt options defined in // Scalarizer.cpp. When the cl::opt are specified, they take precedence. - // When the cl::opt are not specified, the present optional booleans allow to + // When the cl::opt are not specified, the present optional values allow to // override the cl::opt's default values. std::optional ScalarizeVariableInsertExtract; std::optional ScalarizeLoadStore; + std::optional ScalarizeMinBits; }; class ScalarizerPass : public PassInfoMixin { ScalarizerPassOptions Options; public: + ScalarizerPass() = default; + ScalarizerPass(const ScalarizerPassOptions &Options) : Options(Options) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); void setScalarizeVariableInsertExtract(bool Value) { Options.ScalarizeVariableInsertExtract = Value; } void setScalarizeLoadStore(bool Value) { Options.ScalarizeLoadStore = Value; } + void setScalarizeMinBits(unsigned Value) { Options.ScalarizeMinBits = Value; } }; /// Create a legacy pass manager instance of the Scalarizer pass diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// // -// This pass converts vector operations into scalar operations, in order -// to expose optimization opportunities on the individual scalar operations. +// This pass converts vector operations into scalar operations (or, optionally, +// operations on smaller vector widths), in order to expose optimization +// opportunities on the individual scalar operations. // It is mainly intended for targets that do not have vector units, but it // may also be useful for revectorizing code to different vector widths. // @@ -62,6 +63,11 @@ "scalarize-load-store", cl::init(false), cl::Hidden, cl::desc("Allow the scalarizer pass to scalarize loads and store")); +static cl::opt ClScalarizeMinBits( + "scalarize-min-bits", cl::init(0), cl::Hidden, + cl::desc("Instruct the scalarizer pass to attempt to keep values of a " + "minimum number of bits")); + namespace { BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) { @@ -88,6 +94,29 @@ // along with a pointer to their scattered forms. using GatherList = SmallVector, 16>; +struct VectorSplit { + // The type of the vector. + FixedVectorType *VecTy = nullptr; + + // The number of elements packed in a fragment (other than the remainder). + unsigned NumPacked = 0; + + // The number of fragments (scalars or smaller vectors) into which the vector + // shall be split. + unsigned NumFragments = 0; + + // The type of each complete fragment. + Type *SplitTy = nullptr; + + // The type of the remainder (last) fragment; null if all fragments are + // complete. + Type *RemainderTy = nullptr; + + Type *getFragmentType(unsigned I) const { + return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy; + } +}; + // Provides a very limited vector-like interface for lazily accessing one // component of a scattered vector or vector pointer. class Scatterer { @@ -97,23 +126,23 @@ // Scatter V into Size components. If new instructions are needed, // insert them before BBI in BB. If Cache is nonnull, use it to cache // the results. - Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, Type *PtrElemTy, - ValueVector *cachePtr = nullptr); + Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, + const VectorSplit &VS, ValueVector *cachePtr = nullptr); // Return component I, creating a new Value for it if necessary. Value *operator[](unsigned I); // Return the number of components. - unsigned size() const { return Size; } + unsigned size() const { return VS.NumFragments; } private: BasicBlock *BB; BasicBlock::iterator BBI; Value *V; - Type *PtrElemTy; + VectorSplit VS; + bool IsPointer; ValueVector *CachePtr; ValueVector Tmp; - unsigned Size; }; // FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp @@ -171,24 +200,74 @@ struct VectorLayout { VectorLayout() = default; - // Return the alignment of element I. - Align getElemAlign(unsigned I) { - return commonAlignment(VecAlign, I * ElemSize); + // Return the alignment of fragment I. + Align getFragmentAlign(unsigned I) { + return commonAlignment(VecAlign, I * SplitSize); } - // The type of the vector. - FixedVectorType *VecTy = nullptr; - - // The type of each element. - Type *ElemTy = nullptr; + // The split of the underlying vector type. + VectorSplit VS; // The alignment of the vector. Align VecAlign; - // The size of each element. - uint64_t ElemSize = 0; + // The size of each (non-remainder) fragment in bytes. + uint64_t SplitSize = 0; }; +/// Concatenate the given fragments to a single vector value of the type +/// described in @p VS. +static Value *concatenate(IRBuilder<> &Builder, ArrayRef Fragments, + const VectorSplit &VS, Twine Name) { + unsigned NumElements = VS.VecTy->getNumElements(); + SmallVector ExtendMask; + SmallVector InsertMask; + + if (VS.NumPacked > 1) { + // Prepare the shufflevector masks once and re-use them for all + // fragments. + ExtendMask.resize(NumElements, -1); + for (unsigned I = 0; I < VS.NumPacked; ++I) + ExtendMask[I] = I; + + InsertMask.resize(NumElements); + for (unsigned I = 0; I < NumElements; ++I) + InsertMask[I] = I; + } + + Value *Res = PoisonValue::get(VS.VecTy); + for (unsigned I = 0; I < VS.NumFragments; ++I) { + Value *Fragment = Fragments[I]; + + unsigned NumPacked = VS.NumPacked; + if (I == VS.NumFragments - 1 && VS.RemainderTy) { + if (auto *RemVecTy = dyn_cast(VS.RemainderTy)) + NumPacked = RemVecTy->getNumElements(); + else + NumPacked = 1; + } + + if (NumPacked == 1) { + Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked, + Name + ".upto" + Twine(I)); + } else { + Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask); + if (I == 0) { + Res = Fragment; + } else { + for (unsigned J = 0; J < NumPacked; ++J) + InsertMask[I * VS.NumPacked + J] = NumElements + J; + Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask, + Name + ".upto" + Twine(I)); + for (unsigned J = 0; J < NumPacked; ++J) + InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J; + } + } + } + + return Res; +} + template T getWithDefaultOverride(const cl::opt &ClOption, const std::optional &DefaultOverride) { @@ -205,8 +284,9 @@ getWithDefaultOverride(ClScalarizeVariableInsertExtract, Options.ScalarizeVariableInsertExtract)), ScalarizeLoadStore(getWithDefaultOverride(ClScalarizeLoadStore, - Options.ScalarizeLoadStore)) { - } + Options.ScalarizeLoadStore)), + ScalarizeMinBits(getWithDefaultOverride(ClScalarizeMinBits, + Options.ScalarizeMinBits)) {} bool visit(Function &F); @@ -230,11 +310,12 @@ bool visitCallInst(CallInst &ICI); private: - Scatterer scatter(Instruction *Point, Value *V, Type *PtrElemTy = nullptr); - void gather(Instruction *Op, const ValueVector &CV); + Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS); + void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS); void replaceUses(Instruction *Op, Value *CV); bool canTransferMetadata(unsigned Kind); void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV); + std::optional getVectorSplit(Type *Ty); std::optional getVectorLayout(Type *Ty, Align Alignment, const DataLayout &DL); bool finish(); @@ -256,6 +337,7 @@ const bool ScalarizeVariableInsertExtract; const bool ScalarizeLoadStore; + const unsigned ScalarizeMinBits; }; class ScalarizerLegacyPass : public FunctionPass { @@ -284,40 +366,60 @@ "Scalarize vector operations", false, false) Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, - Type *PtrElemTy, ValueVector *cachePtr) - : BB(bb), BBI(bbi), V(v), PtrElemTy(PtrElemTy), CachePtr(cachePtr) { + const VectorSplit &VS, ValueVector *cachePtr) + : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) { Type *Ty = V->getType(); if (Ty->isPointerTy()) { - assert(cast(Ty)->isOpaqueOrPointeeTypeMatches(PtrElemTy) && + assert(cast(Ty)->isOpaqueOrPointeeTypeMatches(VS.VecTy) && "Pointer element type mismatch"); - Ty = PtrElemTy; + IsPointer = true; + } else { + IsPointer = false; + } + if (!CachePtr) { + Tmp.resize(VS.NumFragments, nullptr); + } else { + assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() || + IsPointer) && + "Inconsistent vector sizes"); + if (VS.NumFragments > CachePtr->size()) + CachePtr->resize(VS.NumFragments, nullptr); } - Size = cast(Ty)->getNumElements(); - if (!CachePtr) - Tmp.resize(Size, nullptr); - else if (CachePtr->empty()) - CachePtr->resize(Size, nullptr); - else - assert(Size == CachePtr->size() && "Inconsistent vector sizes"); } // Return component I, creating a new Value for it if necessary. -Value *Scatterer::operator[](unsigned I) { - ValueVector &CV = (CachePtr ? *CachePtr : Tmp); +Value *Scatterer::operator[](unsigned Frag) { + ValueVector &CV = CachePtr ? *CachePtr : Tmp; // Try to reuse a previous value. - if (CV[I]) - return CV[I]; + if (CV[Frag]) + return CV[Frag]; IRBuilder<> Builder(BB, BBI); - if (PtrElemTy) { - Type *VectorElemTy = cast(PtrElemTy)->getElementType(); + if (IsPointer) { if (!CV[0]) { - Type *NewPtrTy = PointerType::get( - VectorElemTy, V->getType()->getPointerAddressSpace()); + Type *NewPtrTy = + PointerType::get(VS.SplitTy, V->getType()->getPointerAddressSpace()); CV[0] = Builder.CreateBitCast(V, NewPtrTy, V->getName() + ".i0"); } - if (I != 0) - CV[I] = Builder.CreateConstGEP1_32(VectorElemTy, CV[0], I, - V->getName() + ".i" + Twine(I)); + if (Frag != 0) + CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, CV[0], Frag, + V->getName() + ".i" + Twine(Frag)); + if (Frag == VS.NumFragments - 1 && VS.RemainderTy) { + Type *NewPtrTy = PointerType::get(VS.RemainderTy, + V->getType()->getPointerAddressSpace()); + CV[Frag] = Builder.CreateBitCast(CV[Frag], NewPtrTy); + } + return CV[Frag]; + } + + Type *FragmentTy = VS.getFragmentType(Frag); + + if (auto *VecTy = dyn_cast(FragmentTy)) { + SmallVector Mask; + for (unsigned J = 0; J < VecTy->getNumElements(); ++J) + Mask.push_back(Frag * VS.NumPacked + J); + CV[Frag] = + Builder.CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask, + V->getName() + ".i" + Twine(Frag)); } else { // Search through a chain of InsertElementInsts looking for element I. // Record other elements in the cache. The new V is still suitable @@ -331,20 +433,21 @@ break; unsigned J = Idx->getZExtValue(); V = Insert->getOperand(0); - if (I == J) { - CV[J] = Insert->getOperand(1); - return CV[J]; - } else if (!CV[J]) { + if (Frag * VS.NumPacked == J) { + CV[Frag] = Insert->getOperand(1); + return CV[Frag]; + } else if (VS.NumPacked == 1 && !CV[J]) { // Only cache the first entry we find for each index we're not actively // searching for. This prevents us from going too far up the chain and // caching incorrect entries. CV[J] = Insert->getOperand(1); } } - CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I), - V->getName() + ".i" + Twine(I)); + CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked, + V->getName() + ".i" + Twine(Frag)); } - return CV[I]; + + return CV[Frag]; } bool ScalarizerLegacyPass::runOnFunction(Function &F) { @@ -386,13 +489,13 @@ // Return a scattered form of V that can be accessed by Point. V must be a // vector or a pointer to a vector. Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V, - Type *PtrElemTy) { + const VectorSplit &VS) { if (Argument *VArg = dyn_cast(V)) { // Put the scattered form of arguments in the entry block, // so that it can be used everywhere. Function *F = VArg->getParent(); BasicBlock *BB = &F->getEntryBlock(); - return Scatterer(BB, BB->begin(), V, PtrElemTy, &Scattered[{V, PtrElemTy}]); + return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]); } if (Instruction *VOp = dyn_cast(V)) { // When scalarizing PHI nodes we might try to examine/rewrite InsertElement @@ -403,29 +506,30 @@ // need to analyse them further. if (!DT->isReachableFromEntry(VOp->getParent())) return Scatterer(Point->getParent(), Point->getIterator(), - PoisonValue::get(V->getType()), PtrElemTy); + PoisonValue::get(V->getType()), VS); // Put the scattered form of an instruction directly after the // instruction, skipping over PHI nodes and debug intrinsics. BasicBlock *BB = VOp->getParent(); return Scatterer( - BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, - PtrElemTy, &Scattered[{V, PtrElemTy}]); + BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, VS, + &Scattered[{V, VS.SplitTy}]); } // In the fallback case, just put the scattered before Point and // keep the result local to Point. - return Scatterer(Point->getParent(), Point->getIterator(), V, PtrElemTy); + return Scatterer(Point->getParent(), Point->getIterator(), V, VS); } // Replace Op with the gathered form of the components in CV. Defer the // deletion of Op and creation of the gathered form to the end of the pass, // so that we can avoid creating the gathered form if all uses of Op are // replaced with uses of CV. -void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) { +void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV, + const VectorSplit &VS) { transferMetadataAndIRFlags(Op, CV); // If we already have a scattered form of Op (created from ExtractElements // of Op itself), replace them with the new form. - ValueVector &SV = Scattered[{Op, nullptr}]; + ValueVector &SV = Scattered[{Op, VS.SplitTy}]; if (!SV.empty()) { for (unsigned I = 0, E = SV.size(); I != E; ++I) { Value *V = SV[I]; @@ -483,23 +587,57 @@ } } +// Determine how Ty is split, if at all. +std::optional ScalarizerVisitor::getVectorSplit(Type *Ty) { + VectorSplit Split; + Split.VecTy = dyn_cast(Ty); + if (!Split.VecTy) + return {}; + + unsigned NumElems = Split.VecTy->getNumElements(); + Type *ElemTy = Split.VecTy->getElementType(); + + if (NumElems == 1 || ElemTy->isPointerTy() || + ElemTy->getScalarSizeInBits() >= ScalarizeMinBits) { + Split.NumPacked = 1; + Split.NumFragments = NumElems; + Split.SplitTy = ElemTy; + } else { + Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits(); + if (Split.NumPacked >= NumElems) + return {}; + + Split.NumFragments = divideCeil(NumElems, Split.NumPacked); + Split.SplitTy = FixedVectorType::get(ElemTy, Split.NumPacked); + + unsigned RemainderElems = NumElems % Split.NumPacked; + if (RemainderElems > 1) + Split.RemainderTy = FixedVectorType::get(ElemTy, RemainderElems); + else if (RemainderElems == 1) + Split.RemainderTy = ElemTy; + } + + return Split; +} + // Try to fill in Layout from Ty, returning true on success. Alignment is // the alignment of the vector, or std::nullopt if the ABI default should be // used. std::optional ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment, const DataLayout &DL) { + std::optional VS = getVectorSplit(Ty); + if (!VS) + return {}; + VectorLayout Layout; - // Make sure we're dealing with a vector. - Layout.VecTy = dyn_cast(Ty); - if (!Layout.VecTy) - return std::nullopt; - // Check that we're dealing with full-byte elements. - Layout.ElemTy = Layout.VecTy->getElementType(); - if (!DL.typeSizeEqualsStoreSize(Layout.ElemTy)) - return std::nullopt; + Layout.VS = *VS; + // Check that we're dealing with full-byte fragments. + if (!DL.typeSizeEqualsStoreSize(VS->SplitTy) || + (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(VS->RemainderTy))) + return {}; Layout.VecAlign = Alignment; - Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy); + Layout.SplitSize = DL.getTypeStoreSize(VS->SplitTy); return Layout; } @@ -507,19 +645,27 @@ // to create an instruction like I with operand X and name Name. template bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) { - auto *VT = dyn_cast(I.getType()); - if (!VT) + std::optional VS = getVectorSplit(I.getType()); + if (!VS) return false; - unsigned NumElems = VT->getNumElements(); + std::optional OpVS; + if (I.getOperand(0)->getType() == I.getType()) { + OpVS = VS; + } else { + OpVS = getVectorSplit(I.getOperand(0)->getType()); + if (!OpVS || VS->NumPacked != OpVS->NumPacked) + return false; + } + IRBuilder<> Builder(&I); - Scatterer Op = scatter(&I, I.getOperand(0)); - assert(Op.size() == NumElems && "Mismatched unary operation"); + Scatterer Op = scatter(&I, I.getOperand(0), *OpVS); + assert(Op.size() == VS->NumFragments && "Mismatched unary operation"); ValueVector Res; - Res.resize(NumElems); - for (unsigned Elem = 0; Elem < NumElems; ++Elem) - Res[Elem] = Split(Builder, Op[Elem], I.getName() + ".i" + Twine(Elem)); - gather(&I, Res); + Res.resize(VS->NumFragments); + for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) + Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag)); + gather(&I, Res, *VS); return true; } @@ -527,24 +673,32 @@ // to create an instruction like I with operands X and Y and name Name. template bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) { - auto *VT = dyn_cast(I.getType()); - if (!VT) + std::optional VS = getVectorSplit(I.getType()); + if (!VS) return false; - unsigned NumElems = VT->getNumElements(); + std::optional OpVS; + if (I.getOperand(0)->getType() == I.getType()) { + OpVS = VS; + } else { + OpVS = getVectorSplit(I.getOperand(0)->getType()); + if (!OpVS || VS->NumPacked != OpVS->NumPacked) + return false; + } + IRBuilder<> Builder(&I); - Scatterer VOp0 = scatter(&I, I.getOperand(0)); - Scatterer VOp1 = scatter(&I, I.getOperand(1)); - assert(VOp0.size() == NumElems && "Mismatched binary operation"); - assert(VOp1.size() == NumElems && "Mismatched binary operation"); + Scatterer VOp0 = scatter(&I, I.getOperand(0), *OpVS); + Scatterer VOp1 = scatter(&I, I.getOperand(1), *OpVS); + assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation"); + assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation"); ValueVector Res; - Res.resize(NumElems); - for (unsigned Elem = 0; Elem < NumElems; ++Elem) { - Value *Op0 = VOp0[Elem]; - Value *Op1 = VOp1[Elem]; - Res[Elem] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Elem)); + Res.resize(VS->NumFragments); + for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) { + Value *Op0 = VOp0[Frag]; + Value *Op1 = VOp1[Frag]; + Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag)); } - gather(&I, Res); + gather(&I, Res, *VS); return true; } @@ -552,18 +706,11 @@ return isTriviallyVectorizable(ID); } -// All of the current scalarizable intrinsics only have one mangled type. -static Function *getScalarIntrinsicDeclaration(Module *M, - Intrinsic::ID ID, - ArrayRef Tys) { - return Intrinsic::getDeclaration(M, ID, Tys); -} - /// If a call to a vector typed intrinsic function, split into a scalar call per /// element if possible for the intrinsic. bool ScalarizerVisitor::splitCall(CallInst &CI) { - auto *VT = dyn_cast(CI.getType()); - if (!VT) + std::optional VS = getVectorSplit(CI.getType()); + if (!VS) return false; Function *F = CI.getCalledFunction(); @@ -574,28 +721,41 @@ if (ID == Intrinsic::not_intrinsic || !isTriviallyScalariable(ID)) return false; - unsigned NumElems = VT->getNumElements(); + // unsigned NumElems = VT->getNumElements(); unsigned NumArgs = CI.arg_size(); ValueVector ScalarOperands(NumArgs); SmallVector Scattered(NumArgs); - - Scattered.resize(NumArgs); + SmallVector OverloadIdx(NumArgs, -1); SmallVector Tys; // Add return type if intrinsic is overloaded on it. if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) - Tys.push_back(VT->getScalarType()); + Tys.push_back(VS->SplitTy); // Assumes that any vector type has the same number of elements as the return // vector type, which is true for all current intrinsics. for (unsigned I = 0; I != NumArgs; ++I) { Value *OpI = CI.getOperand(I); - if (OpI->getType()->isVectorTy()) { - Scattered[I] = scatter(&CI, OpI); - assert(Scattered[I].size() == NumElems && "mismatched call operands"); - if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) - Tys.push_back(OpI->getType()->getScalarType()); + if (auto *OpVecTy = dyn_cast(OpI->getType())) { + assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements()); + std::optional OpVS = getVectorSplit(OpI->getType()); + if (!OpVS || OpVS->NumPacked != VS->NumPacked) { + // The natural split of the operand doesn't match the result. This could + // happen if the vector elements are different and the ScalarizeMinBits + // option is used. + // + // We could in principle handle this case as well, at the cost of + // complicating the scattering machinery to support multiple scattering + // granularities for a single value. + return false; + } + + Scattered[I] = scatter(&CI, OpI, *OpVS); + if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) { + OverloadIdx[I] = Tys.size(); + Tys.push_back(OpVS->SplitTy); + } } else { ScalarOperands[I] = OpI; if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) @@ -603,49 +763,67 @@ } } - ValueVector Res(NumElems); + ValueVector Res(VS->NumFragments); ValueVector ScalarCallOps(NumArgs); - Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, Tys); + Function *NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys); IRBuilder<> Builder(&CI); // Perform actual scalarization, taking care to preserve any scalar operands. - for (unsigned Elem = 0; Elem < NumElems; ++Elem) { + for (unsigned I = 0; I < VS->NumFragments; ++I) { + bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy; ScalarCallOps.clear(); + if (IsRemainder) + Tys[0] = VS->RemainderTy; + for (unsigned J = 0; J != NumArgs; ++J) { - if (isVectorIntrinsicWithScalarOpAtArg(ID, J)) + if (isVectorIntrinsicWithScalarOpAtArg(ID, J)) { ScalarCallOps.push_back(ScalarOperands[J]); - else - ScalarCallOps.push_back(Scattered[J][Elem]); + } else { + ScalarCallOps.push_back(Scattered[J][I]); + if (IsRemainder && OverloadIdx[J] >= 0) + Tys[OverloadIdx[J]] = Scattered[J][I]->getType(); + } } - Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps, - CI.getName() + ".i" + Twine(Elem)); + if (IsRemainder) + NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys); + + Res[I] = Builder.CreateCall(NewIntrin, ScalarCallOps, + CI.getName() + ".i" + Twine(I)); } - gather(&CI, Res); + gather(&CI, Res, *VS); return true; } bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) { - auto *VT = dyn_cast(SI.getType()); - if (!VT) + std::optional VS = getVectorSplit(SI.getType()); + if (!VS) return false; - unsigned NumElems = VT->getNumElements(); + std::optional CondVS; + if (isa(SI.getCondition()->getType())) { + CondVS = getVectorSplit(SI.getCondition()->getType()); + if (!CondVS || CondVS->NumPacked != VS->NumPacked) { + // This happens when ScalarizeMinBits is used. + return false; + } + } + IRBuilder<> Builder(&SI); - Scatterer VOp1 = scatter(&SI, SI.getOperand(1)); - Scatterer VOp2 = scatter(&SI, SI.getOperand(2)); - assert(VOp1.size() == NumElems && "Mismatched select"); - assert(VOp2.size() == NumElems && "Mismatched select"); + Scatterer VOp1 = scatter(&SI, SI.getOperand(1), *VS); + Scatterer VOp2 = scatter(&SI, SI.getOperand(2), *VS); + assert(VOp1.size() == VS->NumFragments && "Mismatched select"); + assert(VOp2.size() == VS->NumFragments && "Mismatched select"); ValueVector Res; - Res.resize(NumElems); + Res.resize(VS->NumFragments); - if (SI.getOperand(0)->getType()->isVectorTy()) { - Scatterer VOp0 = scatter(&SI, SI.getOperand(0)); - assert(VOp0.size() == NumElems && "Mismatched select"); - for (unsigned I = 0; I < NumElems; ++I) { + if (CondVS) { + Scatterer VOp0 = scatter(&SI, SI.getOperand(0), *CondVS); + assert(VOp0.size() == CondVS->NumFragments && "Mismatched select"); + for (unsigned I = 0; I < VS->NumFragments; ++I) { Value *Op0 = VOp0[I]; Value *Op1 = VOp1[I]; Value *Op2 = VOp2[I]; @@ -654,14 +832,14 @@ } } else { Value *Op0 = SI.getOperand(0); - for (unsigned I = 0; I < NumElems; ++I) { + for (unsigned I = 0; I < VS->NumFragments; ++I) { Value *Op1 = VOp1[I]; Value *Op2 = VOp2[I]; Res[I] = Builder.CreateSelect(Op0, Op1, Op2, SI.getName() + ".i" + Twine(I)); } } - gather(&SI, Res); + gather(&SI, Res, *VS); return true; } @@ -682,146 +860,194 @@ } bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { - auto *VT = dyn_cast(GEPI.getType()); - if (!VT) + std::optional VS = getVectorSplit(GEPI.getType()); + if (!VS) return false; IRBuilder<> Builder(&GEPI); - unsigned NumElems = VT->getNumElements(); unsigned NumIndices = GEPI.getNumIndices(); - // The base pointer might be scalar even if it's a vector GEP. In those cases, - // splat the pointer into a vector value, and scatter that vector. - Value *Op0 = GEPI.getOperand(0); - if (!Op0->getType()->isVectorTy()) - Op0 = Builder.CreateVectorSplat(NumElems, Op0); - Scatterer Base = scatter(&GEPI, Op0); - - SmallVector Ops; - Ops.resize(NumIndices); - for (unsigned I = 0; I < NumIndices; ++I) { - Value *Op = GEPI.getOperand(I + 1); - - // The indices might be scalars even if it's a vector GEP. In those cases, - // splat the scalar into a vector value, and scatter that vector. - if (!Op->getType()->isVectorTy()) - Op = Builder.CreateVectorSplat(NumElems, Op); - - Ops[I] = scatter(&GEPI, Op); + // The base pointer and indices might be scalar even if it's a vector GEP. + SmallVector ScalarOps{1 + NumIndices}; + SmallVector ScatterOps{1 + NumIndices}; + + for (unsigned I = 0; I < 1 + NumIndices; ++I) { + if (auto *VecTy = + dyn_cast(GEPI.getOperand(I)->getType())) { + std::optional OpVS = getVectorSplit(VecTy); + if (!OpVS || OpVS->NumPacked != VS->NumPacked) { + // This can happen when ScalarizeMinBits is used. + return false; + } + ScatterOps[I] = scatter(&GEPI, GEPI.getOperand(I), *OpVS); + } else { + ScalarOps[I] = GEPI.getOperand(I); + } } ValueVector Res; - Res.resize(NumElems); - for (unsigned I = 0; I < NumElems; ++I) { - SmallVector Indices; - Indices.resize(NumIndices); - for (unsigned J = 0; J < NumIndices; ++J) - Indices[J] = Ops[J][I]; - Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices, + Res.resize(VS->NumFragments); + for (unsigned I = 0; I < VS->NumFragments; ++I) { + SmallVector SplitOps; + SplitOps.resize(1 + NumIndices); + for (unsigned J = 0; J < 1 + NumIndices; ++J) { + if (ScalarOps[J]) + SplitOps[J] = ScalarOps[J]; + else + SplitOps[J] = ScatterOps[J][I]; + } + Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), SplitOps[0], + makeArrayRef(SplitOps).drop_front(), GEPI.getName() + ".i" + Twine(I)); if (GEPI.isInBounds()) if (GetElementPtrInst *NewGEPI = dyn_cast(Res[I])) NewGEPI->setIsInBounds(); } - gather(&GEPI, Res); + gather(&GEPI, Res, *VS); return true; } bool ScalarizerVisitor::visitCastInst(CastInst &CI) { - auto *VT = dyn_cast(CI.getDestTy()); - if (!VT) + std::optional DestVS = getVectorSplit(CI.getDestTy()); + if (!DestVS) + return false; + + std::optional SrcVS = getVectorSplit(CI.getSrcTy()); + if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked) return false; - unsigned NumElems = VT->getNumElements(); IRBuilder<> Builder(&CI); - Scatterer Op0 = scatter(&CI, CI.getOperand(0)); - assert(Op0.size() == NumElems && "Mismatched cast"); + Scatterer Op0 = scatter(&CI, CI.getOperand(0), *SrcVS); + assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast"); ValueVector Res; - Res.resize(NumElems); - for (unsigned I = 0; I < NumElems; ++I) - Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(), - CI.getName() + ".i" + Twine(I)); - gather(&CI, Res); + Res.resize(DestVS->NumFragments); + for (unsigned I = 0; I < DestVS->NumFragments; ++I) + Res[I] = + Builder.CreateCast(CI.getOpcode(), Op0[I], DestVS->getFragmentType(I), + CI.getName() + ".i" + Twine(I)); + gather(&CI, Res, *DestVS); return true; } bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) { - auto *DstVT = dyn_cast(BCI.getDestTy()); - auto *SrcVT = dyn_cast(BCI.getSrcTy()); - if (!DstVT || !SrcVT) + std::optional DstVS = getVectorSplit(BCI.getDestTy()); + std::optional SrcVS = getVectorSplit(BCI.getSrcTy()); + if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy) return false; - unsigned DstNumElems = DstVT->getNumElements(); - unsigned SrcNumElems = SrcVT->getNumElements(); + const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy(); + + // Vectors of pointers are always fully scalarized. + assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1)); + IRBuilder<> Builder(&BCI); - Scatterer Op0 = scatter(&BCI, BCI.getOperand(0)); + Scatterer Op0 = scatter(&BCI, BCI.getOperand(0), *SrcVS); ValueVector Res; - Res.resize(DstNumElems); + Res.resize(DstVS->NumFragments); - if (DstNumElems == SrcNumElems) { - for (unsigned I = 0; I < DstNumElems; ++I) - Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(), + unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits(); + unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits(); + + if (isPointerTy || DstSplitBits == SrcSplitBits) { + assert(DstVS->NumFragments == SrcVS->NumFragments); + for (unsigned I = 0; I < DstVS->NumFragments; ++I) { + Res[I] = Builder.CreateBitCast(Op0[I], DstVS->getFragmentType(I), BCI.getName() + ".i" + Twine(I)); - } else if (DstNumElems > SrcNumElems) { - // -> . Convert each t1 to and copy the - // individual elements to the destination. - unsigned FanOut = DstNumElems / SrcNumElems; - auto *MidTy = FixedVectorType::get(DstVT->getElementType(), FanOut); + } + } else if (SrcSplitBits % DstSplitBits == 0) { + // Convert each source fragment to the same-sized destination vector and + // then scatter the result to the destination. + VectorSplit MidVS; + MidVS.NumPacked = DstVS->NumPacked; + MidVS.NumFragments = SrcSplitBits / DstSplitBits; + MidVS.VecTy = FixedVectorType::get(DstVS->VecTy->getElementType(), + MidVS.NumPacked * MidVS.NumFragments); + MidVS.SplitTy = DstVS->SplitTy; + unsigned ResI = 0; - for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) { - Value *V = Op0[Op0I]; - Instruction *VI; + for (unsigned I = 0; I < SrcVS->NumFragments; ++I) { + Value *V = Op0[I]; + // Look through any existing bitcasts before converting to . // In the best case, the resulting conversion might be a no-op. + Instruction *VI; while ((VI = dyn_cast(V)) && VI->getOpcode() == Instruction::BitCast) V = VI->getOperand(0); - V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast"); - Scatterer Mid = scatter(&BCI, V); - for (unsigned MidI = 0; MidI < FanOut; ++MidI) - Res[ResI++] = Mid[MidI]; + + V = Builder.CreateBitCast(V, MidVS.VecTy, V->getName() + ".cast"); + + Scatterer Mid = scatter(&BCI, V, MidVS); + for (unsigned J = 0; J < MidVS.NumFragments; ++J) + Res[ResI++] = Mid[J]; } - } else { - // -> . Convert each group of into a t2. - unsigned FanIn = SrcNumElems / DstNumElems; - auto *MidTy = FixedVectorType::get(SrcVT->getElementType(), FanIn); - unsigned Op0I = 0; - for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) { - Value *V = PoisonValue::get(MidTy); - for (unsigned MidI = 0; MidI < FanIn; ++MidI) - V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI), - BCI.getName() + ".i" + Twine(ResI) - + ".upto" + Twine(MidI)); - Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(), - BCI.getName() + ".i" + Twine(ResI)); + } else if (DstSplitBits % SrcSplitBits == 0) { + // Gather enough source fragments to make up a destination fragment and + // then convert to the destination type. + VectorSplit MidVS; + MidVS.NumFragments = DstSplitBits / SrcSplitBits; + MidVS.NumPacked = SrcVS->NumPacked; + MidVS.VecTy = FixedVectorType::get(SrcVS->VecTy->getElementType(), + MidVS.NumPacked * MidVS.NumFragments); + MidVS.SplitTy = SrcVS->SplitTy; + + unsigned SrcI = 0; + SmallVector ConcatOps; + ConcatOps.resize(MidVS.NumFragments); + for (unsigned I = 0; I < DstVS->NumFragments; ++I) { + for (unsigned J = 0; J < MidVS.NumFragments; ++J) + ConcatOps[J] = Op0[SrcI++]; + Value *V = concatenate(Builder, ConcatOps, MidVS, + BCI.getName() + ".i" + Twine(I)); + Res[I] = Builder.CreateBitCast(V, DstVS->getFragmentType(I), + BCI.getName() + ".i" + Twine(I)); } + } else { + return false; } - gather(&BCI, Res); + + gather(&BCI, Res, *DstVS); return true; } bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) { - auto *VT = dyn_cast(IEI.getType()); - if (!VT) + std::optional VS = getVectorSplit(IEI.getType()); + if (!VS) return false; - unsigned NumElems = VT->getNumElements(); IRBuilder<> Builder(&IEI); - Scatterer Op0 = scatter(&IEI, IEI.getOperand(0)); + Scatterer Op0 = scatter(&IEI, IEI.getOperand(0), *VS); Value *NewElt = IEI.getOperand(1); Value *InsIdx = IEI.getOperand(2); ValueVector Res; - Res.resize(NumElems); + Res.resize(VS->NumFragments); if (auto *CI = dyn_cast(InsIdx)) { - for (unsigned I = 0; I < NumElems; ++I) - Res[I] = CI->getValue().getZExtValue() == I ? NewElt : Op0[I]; + unsigned Idx = CI->getZExtValue(); + unsigned Fragment = Idx / VS->NumPacked; + for (unsigned I = 0; I < VS->NumFragments; ++I) { + if (I == Fragment) { + bool IsPacked = VS->NumPacked > 1; + if (Fragment == VS->NumFragments - 1 && VS->RemainderTy && + !VS->RemainderTy->isVectorTy()) + IsPacked = false; + if (IsPacked) { + Res[I] = + Builder.CreateInsertElement(Op0[I], NewElt, Idx % VS->NumPacked); + } else { + Res[I] = NewElt; + } + } else { + Res[I] = Op0[I]; + } + } } else { - if (!ScalarizeVariableInsertExtract) + // Never split a variable insertelement that isn't fully scalarized. + if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1) return false; - for (unsigned I = 0; I < NumElems; ++I) { + for (unsigned I = 0; I < VS->NumFragments; ++I) { Value *ShouldReplace = Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I), InsIdx->getName() + ".is." + Twine(I)); @@ -831,31 +1057,39 @@ } } - gather(&IEI, Res); + gather(&IEI, Res, *VS); return true; } bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) { - auto *VT = dyn_cast(EEI.getOperand(0)->getType()); - if (!VT) + std::optional VS = getVectorSplit(EEI.getOperand(0)->getType()); + if (!VS) return false; - unsigned NumSrcElems = VT->getNumElements(); IRBuilder<> Builder(&EEI); - Scatterer Op0 = scatter(&EEI, EEI.getOperand(0)); + Scatterer Op0 = scatter(&EEI, EEI.getOperand(0), *VS); Value *ExtIdx = EEI.getOperand(1); if (auto *CI = dyn_cast(ExtIdx)) { - Value *Res = Op0[CI->getValue().getZExtValue()]; + unsigned Idx = CI->getZExtValue(); + unsigned Fragment = Idx / VS->NumPacked; + Value *Res = Op0[Fragment]; + bool IsPacked = VS->NumPacked > 1; + if (Fragment == VS->NumFragments - 1 && VS->RemainderTy && + !VS->RemainderTy->isVectorTy()) + IsPacked = false; + if (IsPacked) + Res = Builder.CreateExtractElement(Res, Idx % VS->NumPacked); replaceUses(&EEI, Res); return true; } - if (!ScalarizeVariableInsertExtract) + // Never split a variable extractelement that isn't fully scalarized. + if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1) return false; - Value *Res = PoisonValue::get(VT->getElementType()); - for (unsigned I = 0; I < NumSrcElems; ++I) { + Value *Res = PoisonValue::get(VS->VecTy->getElementType()); + for (unsigned I = 0; I < VS->NumFragments; ++I) { Value *ShouldExtract = Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I), ExtIdx->getName() + ".is." + Twine(I)); @@ -868,51 +1102,51 @@ } bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) { - auto *VT = dyn_cast(SVI.getType()); - if (!VT) + std::optional VS = getVectorSplit(SVI.getType()); + std::optional VSOp = getVectorSplit(SVI.getOperand(0)->getType()); + if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1) return false; - unsigned NumElems = VT->getNumElements(); - Scatterer Op0 = scatter(&SVI, SVI.getOperand(0)); - Scatterer Op1 = scatter(&SVI, SVI.getOperand(1)); + Scatterer Op0 = scatter(&SVI, SVI.getOperand(0), *VSOp); + Scatterer Op1 = scatter(&SVI, SVI.getOperand(1), *VSOp); ValueVector Res; - Res.resize(NumElems); + Res.resize(VS->NumFragments); - for (unsigned I = 0; I < NumElems; ++I) { + for (unsigned I = 0; I < VS->NumFragments; ++I) { int Selector = SVI.getMaskValue(I); if (Selector < 0) - Res[I] = UndefValue::get(VT->getElementType()); + Res[I] = UndefValue::get(VS->VecTy->getElementType()); else if (unsigned(Selector) < Op0.size()) Res[I] = Op0[Selector]; else Res[I] = Op1[Selector - Op0.size()]; } - gather(&SVI, Res); + gather(&SVI, Res, *VS); return true; } bool ScalarizerVisitor::visitPHINode(PHINode &PHI) { - auto *VT = dyn_cast(PHI.getType()); - if (!VT) + std::optional VS = getVectorSplit(PHI.getType()); + if (!VS) return false; - unsigned NumElems = cast(VT)->getNumElements(); IRBuilder<> Builder(&PHI); ValueVector Res; - Res.resize(NumElems); + Res.resize(VS->NumFragments); unsigned NumOps = PHI.getNumOperands(); - for (unsigned I = 0; I < NumElems; ++I) - Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps, + for (unsigned I = 0; I < VS->NumFragments; ++I) { + Res[I] = Builder.CreatePHI(VS->getFragmentType(I), NumOps, PHI.getName() + ".i" + Twine(I)); + } for (unsigned I = 0; I < NumOps; ++I) { - Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I)); + Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I), *VS); BasicBlock *IncomingBlock = PHI.getIncomingBlock(I); - for (unsigned J = 0; J < NumElems; ++J) + for (unsigned J = 0; J < VS->NumFragments; ++J) cast(Res[J])->addIncoming(Op[J], IncomingBlock); } - gather(&PHI, Res); + gather(&PHI, Res, *VS); return true; } @@ -927,17 +1161,17 @@ if (!Layout) return false; - unsigned NumElems = cast(Layout->VecTy)->getNumElements(); IRBuilder<> Builder(&LI); - Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), LI.getType()); + Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), Layout->VS); ValueVector Res; - Res.resize(NumElems); + Res.resize(Layout->VS.NumFragments); - for (unsigned I = 0; I < NumElems; ++I) - Res[I] = Builder.CreateAlignedLoad(Layout->VecTy->getElementType(), Ptr[I], - Align(Layout->getElemAlign(I)), + for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) { + Res[I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(I), Ptr[I], + Align(Layout->getFragmentAlign(I)), LI.getName() + ".i" + Twine(I)); - gather(&LI, Res); + } + gather(&LI, Res, Layout->VS); return true; } @@ -953,17 +1187,17 @@ if (!Layout) return false; - unsigned NumElems = cast(Layout->VecTy)->getNumElements(); IRBuilder<> Builder(&SI); - Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), FullValue->getType()); - Scatterer VVal = scatter(&SI, FullValue); + Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), Layout->VS); + Scatterer VVal = scatter(&SI, FullValue, Layout->VS); ValueVector Stores; - Stores.resize(NumElems); - for (unsigned I = 0; I < NumElems; ++I) { + Stores.resize(Layout->VS.NumFragments); + for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) { Value *Val = VVal[I]; Value *Ptr = VPtr[I]; - Stores[I] = Builder.CreateAlignedStore(Val, Ptr, Layout->getElemAlign(I)); + Stores[I] = + Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(I)); } transferMetadataAndIRFlags(&SI, Stores); return true; @@ -985,17 +1219,19 @@ ValueVector &CV = *GMI.second; if (!Op->use_empty()) { // The value is still needed, so recreate it using a series of - // InsertElements. - Value *Res = PoisonValue::get(Op->getType()); + // insertelements and/or shufflevectors. + Value *Res; if (auto *Ty = dyn_cast(Op->getType())) { BasicBlock *BB = Op->getParent(); - unsigned Count = Ty->getNumElements(); IRBuilder<> Builder(Op); if (isa(Op)) Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); - for (unsigned I = 0; I < Count; ++I) - Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I), - Op->getName() + ".upto" + Twine(I)); + + VectorSplit VS = *getVectorSplit(Ty); + assert(VS.NumFragments == CV.size()); + + Res = concatenate(Builder, CV, VS, Op->getName()); + Res->takeName(Op); } else { assert(CV.size() == 1 && Op->getType() == CV[0]->getType()); diff --git a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll --- a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll +++ b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll @@ -8,10 +8,10 @@ define void @f1(<4 x float> %init, ptr %base, i32 %count) { ; CHECK-LABEL: @f1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i32 0 -; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i32 1 -; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i32 2 -; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i32 3 +; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i64 0 +; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i64 1 +; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i64 2 +; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i64 3 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -32,18 +32,18 @@ ; CHECK-NEXT: [[ADD_I1:%.*]] = fadd float [[VAL_I1]], [[VAL_I3]] ; CHECK-NEXT: [[ADD_I2:%.*]] = fadd float [[ACC_I0]], [[ACC_I2]] ; CHECK-NEXT: [[ADD_I3:%.*]] = fadd float [[ACC_I1]], [[ACC_I3]] -; CHECK-NEXT: [[ADD_UPTO0:%.*]] = insertelement <4 x float> poison, float [[ADD_I0]], i32 0 -; CHECK-NEXT: [[ADD_UPTO1:%.*]] = insertelement <4 x float> [[ADD_UPTO0]], float [[ADD_I1]], i32 1 -; CHECK-NEXT: [[ADD_UPTO2:%.*]] = insertelement <4 x float> [[ADD_UPTO1]], float [[ADD_I2]], i32 2 -; CHECK-NEXT: [[ADD:%.*]] = insertelement <4 x float> [[ADD_UPTO2]], float [[ADD_I3]], i32 3 +; CHECK-NEXT: [[ADD_UPTO0:%.*]] = insertelement <4 x float> poison, float [[ADD_I0]], i64 0 +; CHECK-NEXT: [[ADD_UPTO1:%.*]] = insertelement <4 x float> [[ADD_UPTO0]], float [[ADD_I1]], i64 1 +; CHECK-NEXT: [[ADD_UPTO2:%.*]] = insertelement <4 x float> [[ADD_UPTO1]], float [[ADD_I2]], i64 2 +; CHECK-NEXT: [[ADD:%.*]] = insertelement <4 x float> [[ADD_UPTO2]], float [[ADD_I3]], i64 3 ; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[ADD]]) -; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0 +; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i64 0 ; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00 -; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1 +; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i64 1 ; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00 -; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2 +; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i64 2 ; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00 -; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3 +; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i64 3 ; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00 ; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00 ; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00 @@ -94,10 +94,10 @@ define void @f2(<4 x i32> %init, ptr %base, i32 %count) { ; CHECK-LABEL: @f2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i32 0 -; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i32 1 -; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i32 2 -; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i32 3 +; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i64 0 +; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i64 1 +; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i64 2 +; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i64 3 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -282,18 +282,18 @@ ; Check that fpmath information is preserved. define <4 x float> @f6(<4 x float> %x) { ; CHECK-LABEL: @f6( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath !9 -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <4 x float> [[X]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <4 x float> [[X]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath !9 -; CHECK-NEXT: [[X_I2:%.*]] = extractelement <4 x float> [[X]], i32 2 +; CHECK-NEXT: [[X_I2:%.*]] = extractelement <4 x float> [[X]], i64 2 ; CHECK-NEXT: [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath !9 -; CHECK-NEXT: [[X_I3:%.*]] = extractelement <4 x float> [[X]], i32 3 +; CHECK-NEXT: [[X_I3:%.*]] = extractelement <4 x float> [[X]], i64 3 ; CHECK-NEXT: [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath !9 -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 -; CHECK-NEXT: [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i32 2 -; CHECK-NEXT: [[RES:%.*]] = insertelement <4 x float> [[RES_UPTO2]], float [[RES_I3]], i32 3 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 +; CHECK-NEXT: [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i64 2 +; CHECK-NEXT: [[RES:%.*]] = insertelement <4 x float> [[RES_UPTO2]], float [[RES_I3]], i64 3 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %res = fadd <4 x float> %x, , @@ -336,11 +336,11 @@ ; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3 -; CHECK-NEXT: [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0:%.*]], i32 0 -; CHECK-NEXT: [[PTR0_I2:%.*]] = extractelement <4 x ptr> [[PTR0]], i32 2 -; CHECK-NEXT: [[PTR0_I3:%.*]] = extractelement <4 x ptr> [[PTR0]], i32 3 -; CHECK-NEXT: [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i32 1 -; CHECK-NEXT: [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i32 3 +; CHECK-NEXT: [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0:%.*]], i64 0 +; CHECK-NEXT: [[PTR0_I2:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 2 +; CHECK-NEXT: [[PTR0_I3:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 3 +; CHECK-NEXT: [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i64 1 +; CHECK-NEXT: [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i64 3 ; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr float, ptr [[PTR0_I0]], i32 100 ; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr float, ptr [[OTHER:%.*]], i32 [[I0_I1]] ; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr float, ptr [[PTR0_I2]], i32 100 @@ -413,135 +413,135 @@ ; CHECK-LABEL: @f11( ; CHECK-NEXT: [[SRC1:%.*]] = getelementptr <32 x i1>, ptr [[SRC0:%.*]], i32 1 ; CHECK-NEXT: [[VAL0:%.*]] = load <32 x i1>, ptr [[SRC0]], align 4 -; CHECK-NEXT: [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i32 0 -; CHECK-NEXT: [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i32 1 -; CHECK-NEXT: [[VAL0_I2:%.*]] = extractelement <32 x i1> [[VAL0]], i32 2 -; CHECK-NEXT: [[VAL0_I3:%.*]] = extractelement <32 x i1> [[VAL0]], i32 3 -; CHECK-NEXT: [[VAL0_I4:%.*]] = extractelement <32 x i1> [[VAL0]], i32 4 -; CHECK-NEXT: [[VAL0_I5:%.*]] = extractelement <32 x i1> [[VAL0]], i32 5 -; CHECK-NEXT: [[VAL0_I6:%.*]] = extractelement <32 x i1> [[VAL0]], i32 6 -; CHECK-NEXT: [[VAL0_I7:%.*]] = extractelement <32 x i1> [[VAL0]], i32 7 -; CHECK-NEXT: [[VAL0_I8:%.*]] = extractelement <32 x i1> [[VAL0]], i32 8 -; CHECK-NEXT: [[VAL0_I9:%.*]] = extractelement <32 x i1> [[VAL0]], i32 9 -; CHECK-NEXT: [[VAL0_I10:%.*]] = extractelement <32 x i1> [[VAL0]], i32 10 -; CHECK-NEXT: [[VAL0_I11:%.*]] = extractelement <32 x i1> [[VAL0]], i32 11 -; CHECK-NEXT: [[VAL0_I12:%.*]] = extractelement <32 x i1> [[VAL0]], i32 12 -; CHECK-NEXT: [[VAL0_I13:%.*]] = extractelement <32 x i1> [[VAL0]], i32 13 -; CHECK-NEXT: [[VAL0_I14:%.*]] = extractelement <32 x i1> [[VAL0]], i32 14 -; CHECK-NEXT: [[VAL0_I15:%.*]] = extractelement <32 x i1> [[VAL0]], i32 15 -; CHECK-NEXT: [[VAL0_I16:%.*]] = extractelement <32 x i1> [[VAL0]], i32 16 -; CHECK-NEXT: [[VAL0_I17:%.*]] = extractelement <32 x i1> [[VAL0]], i32 17 -; CHECK-NEXT: [[VAL0_I18:%.*]] = extractelement <32 x i1> [[VAL0]], i32 18 -; CHECK-NEXT: [[VAL0_I19:%.*]] = extractelement <32 x i1> [[VAL0]], i32 19 -; CHECK-NEXT: [[VAL0_I20:%.*]] = extractelement <32 x i1> [[VAL0]], i32 20 -; CHECK-NEXT: [[VAL0_I21:%.*]] = extractelement <32 x i1> [[VAL0]], i32 21 -; CHECK-NEXT: [[VAL0_I22:%.*]] = extractelement <32 x i1> [[VAL0]], i32 22 -; CHECK-NEXT: [[VAL0_I23:%.*]] = extractelement <32 x i1> [[VAL0]], i32 23 -; CHECK-NEXT: [[VAL0_I24:%.*]] = extractelement <32 x i1> [[VAL0]], i32 24 -; CHECK-NEXT: [[VAL0_I25:%.*]] = extractelement <32 x i1> [[VAL0]], i32 25 -; CHECK-NEXT: [[VAL0_I26:%.*]] = extractelement <32 x i1> [[VAL0]], i32 26 -; CHECK-NEXT: [[VAL0_I27:%.*]] = extractelement <32 x i1> [[VAL0]], i32 27 -; CHECK-NEXT: [[VAL0_I28:%.*]] = extractelement <32 x i1> [[VAL0]], i32 28 -; CHECK-NEXT: [[VAL0_I29:%.*]] = extractelement <32 x i1> [[VAL0]], i32 29 -; CHECK-NEXT: [[VAL0_I30:%.*]] = extractelement <32 x i1> [[VAL0]], i32 30 -; CHECK-NEXT: [[VAL0_I31:%.*]] = extractelement <32 x i1> [[VAL0]], i32 31 +; CHECK-NEXT: [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i64 0 +; CHECK-NEXT: [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i64 1 +; CHECK-NEXT: [[VAL0_I2:%.*]] = extractelement <32 x i1> [[VAL0]], i64 2 +; CHECK-NEXT: [[VAL0_I3:%.*]] = extractelement <32 x i1> [[VAL0]], i64 3 +; CHECK-NEXT: [[VAL0_I4:%.*]] = extractelement <32 x i1> [[VAL0]], i64 4 +; CHECK-NEXT: [[VAL0_I5:%.*]] = extractelement <32 x i1> [[VAL0]], i64 5 +; CHECK-NEXT: [[VAL0_I6:%.*]] = extractelement <32 x i1> [[VAL0]], i64 6 +; CHECK-NEXT: [[VAL0_I7:%.*]] = extractelement <32 x i1> [[VAL0]], i64 7 +; CHECK-NEXT: [[VAL0_I8:%.*]] = extractelement <32 x i1> [[VAL0]], i64 8 +; CHECK-NEXT: [[VAL0_I9:%.*]] = extractelement <32 x i1> [[VAL0]], i64 9 +; CHECK-NEXT: [[VAL0_I10:%.*]] = extractelement <32 x i1> [[VAL0]], i64 10 +; CHECK-NEXT: [[VAL0_I11:%.*]] = extractelement <32 x i1> [[VAL0]], i64 11 +; CHECK-NEXT: [[VAL0_I12:%.*]] = extractelement <32 x i1> [[VAL0]], i64 12 +; CHECK-NEXT: [[VAL0_I13:%.*]] = extractelement <32 x i1> [[VAL0]], i64 13 +; CHECK-NEXT: [[VAL0_I14:%.*]] = extractelement <32 x i1> [[VAL0]], i64 14 +; CHECK-NEXT: [[VAL0_I15:%.*]] = extractelement <32 x i1> [[VAL0]], i64 15 +; CHECK-NEXT: [[VAL0_I16:%.*]] = extractelement <32 x i1> [[VAL0]], i64 16 +; CHECK-NEXT: [[VAL0_I17:%.*]] = extractelement <32 x i1> [[VAL0]], i64 17 +; CHECK-NEXT: [[VAL0_I18:%.*]] = extractelement <32 x i1> [[VAL0]], i64 18 +; CHECK-NEXT: [[VAL0_I19:%.*]] = extractelement <32 x i1> [[VAL0]], i64 19 +; CHECK-NEXT: [[VAL0_I20:%.*]] = extractelement <32 x i1> [[VAL0]], i64 20 +; CHECK-NEXT: [[VAL0_I21:%.*]] = extractelement <32 x i1> [[VAL0]], i64 21 +; CHECK-NEXT: [[VAL0_I22:%.*]] = extractelement <32 x i1> [[VAL0]], i64 22 +; CHECK-NEXT: [[VAL0_I23:%.*]] = extractelement <32 x i1> [[VAL0]], i64 23 +; CHECK-NEXT: [[VAL0_I24:%.*]] = extractelement <32 x i1> [[VAL0]], i64 24 +; CHECK-NEXT: [[VAL0_I25:%.*]] = extractelement <32 x i1> [[VAL0]], i64 25 +; CHECK-NEXT: [[VAL0_I26:%.*]] = extractelement <32 x i1> [[VAL0]], i64 26 +; CHECK-NEXT: [[VAL0_I27:%.*]] = extractelement <32 x i1> [[VAL0]], i64 27 +; CHECK-NEXT: [[VAL0_I28:%.*]] = extractelement <32 x i1> [[VAL0]], i64 28 +; CHECK-NEXT: [[VAL0_I29:%.*]] = extractelement <32 x i1> [[VAL0]], i64 29 +; CHECK-NEXT: [[VAL0_I30:%.*]] = extractelement <32 x i1> [[VAL0]], i64 30 +; CHECK-NEXT: [[VAL0_I31:%.*]] = extractelement <32 x i1> [[VAL0]], i64 31 ; CHECK-NEXT: [[VAL1:%.*]] = load <32 x i1>, ptr [[SRC1]], align 4 -; CHECK-NEXT: [[VAL1_I0:%.*]] = extractelement <32 x i1> [[VAL1]], i32 0 +; CHECK-NEXT: [[VAL1_I0:%.*]] = extractelement <32 x i1> [[VAL1]], i64 0 ; CHECK-NEXT: [[AND_I0:%.*]] = and i1 [[VAL0_I0]], [[VAL1_I0]] -; CHECK-NEXT: [[VAL1_I1:%.*]] = extractelement <32 x i1> [[VAL1]], i32 1 +; CHECK-NEXT: [[VAL1_I1:%.*]] = extractelement <32 x i1> [[VAL1]], i64 1 ; CHECK-NEXT: [[AND_I1:%.*]] = and i1 [[VAL0_I1]], [[VAL1_I1]] -; CHECK-NEXT: [[VAL1_I2:%.*]] = extractelement <32 x i1> [[VAL1]], i32 2 +; CHECK-NEXT: [[VAL1_I2:%.*]] = extractelement <32 x i1> [[VAL1]], i64 2 ; CHECK-NEXT: [[AND_I2:%.*]] = and i1 [[VAL0_I2]], [[VAL1_I2]] -; CHECK-NEXT: [[VAL1_I3:%.*]] = extractelement <32 x i1> [[VAL1]], i32 3 +; CHECK-NEXT: [[VAL1_I3:%.*]] = extractelement <32 x i1> [[VAL1]], i64 3 ; CHECK-NEXT: [[AND_I3:%.*]] = and i1 [[VAL0_I3]], [[VAL1_I3]] -; CHECK-NEXT: [[VAL1_I4:%.*]] = extractelement <32 x i1> [[VAL1]], i32 4 +; CHECK-NEXT: [[VAL1_I4:%.*]] = extractelement <32 x i1> [[VAL1]], i64 4 ; CHECK-NEXT: [[AND_I4:%.*]] = and i1 [[VAL0_I4]], [[VAL1_I4]] -; CHECK-NEXT: [[VAL1_I5:%.*]] = extractelement <32 x i1> [[VAL1]], i32 5 +; CHECK-NEXT: [[VAL1_I5:%.*]] = extractelement <32 x i1> [[VAL1]], i64 5 ; CHECK-NEXT: [[AND_I5:%.*]] = and i1 [[VAL0_I5]], [[VAL1_I5]] -; CHECK-NEXT: [[VAL1_I6:%.*]] = extractelement <32 x i1> [[VAL1]], i32 6 +; CHECK-NEXT: [[VAL1_I6:%.*]] = extractelement <32 x i1> [[VAL1]], i64 6 ; CHECK-NEXT: [[AND_I6:%.*]] = and i1 [[VAL0_I6]], [[VAL1_I6]] -; CHECK-NEXT: [[VAL1_I7:%.*]] = extractelement <32 x i1> [[VAL1]], i32 7 +; CHECK-NEXT: [[VAL1_I7:%.*]] = extractelement <32 x i1> [[VAL1]], i64 7 ; CHECK-NEXT: [[AND_I7:%.*]] = and i1 [[VAL0_I7]], [[VAL1_I7]] -; CHECK-NEXT: [[VAL1_I8:%.*]] = extractelement <32 x i1> [[VAL1]], i32 8 +; CHECK-NEXT: [[VAL1_I8:%.*]] = extractelement <32 x i1> [[VAL1]], i64 8 ; CHECK-NEXT: [[AND_I8:%.*]] = and i1 [[VAL0_I8]], [[VAL1_I8]] -; CHECK-NEXT: [[VAL1_I9:%.*]] = extractelement <32 x i1> [[VAL1]], i32 9 +; CHECK-NEXT: [[VAL1_I9:%.*]] = extractelement <32 x i1> [[VAL1]], i64 9 ; CHECK-NEXT: [[AND_I9:%.*]] = and i1 [[VAL0_I9]], [[VAL1_I9]] -; CHECK-NEXT: [[VAL1_I10:%.*]] = extractelement <32 x i1> [[VAL1]], i32 10 +; CHECK-NEXT: [[VAL1_I10:%.*]] = extractelement <32 x i1> [[VAL1]], i64 10 ; CHECK-NEXT: [[AND_I10:%.*]] = and i1 [[VAL0_I10]], [[VAL1_I10]] -; CHECK-NEXT: [[VAL1_I11:%.*]] = extractelement <32 x i1> [[VAL1]], i32 11 +; CHECK-NEXT: [[VAL1_I11:%.*]] = extractelement <32 x i1> [[VAL1]], i64 11 ; CHECK-NEXT: [[AND_I11:%.*]] = and i1 [[VAL0_I11]], [[VAL1_I11]] -; CHECK-NEXT: [[VAL1_I12:%.*]] = extractelement <32 x i1> [[VAL1]], i32 12 +; CHECK-NEXT: [[VAL1_I12:%.*]] = extractelement <32 x i1> [[VAL1]], i64 12 ; CHECK-NEXT: [[AND_I12:%.*]] = and i1 [[VAL0_I12]], [[VAL1_I12]] -; CHECK-NEXT: [[VAL1_I13:%.*]] = extractelement <32 x i1> [[VAL1]], i32 13 +; CHECK-NEXT: [[VAL1_I13:%.*]] = extractelement <32 x i1> [[VAL1]], i64 13 ; CHECK-NEXT: [[AND_I13:%.*]] = and i1 [[VAL0_I13]], [[VAL1_I13]] -; CHECK-NEXT: [[VAL1_I14:%.*]] = extractelement <32 x i1> [[VAL1]], i32 14 +; CHECK-NEXT: [[VAL1_I14:%.*]] = extractelement <32 x i1> [[VAL1]], i64 14 ; CHECK-NEXT: [[AND_I14:%.*]] = and i1 [[VAL0_I14]], [[VAL1_I14]] -; CHECK-NEXT: [[VAL1_I15:%.*]] = extractelement <32 x i1> [[VAL1]], i32 15 +; CHECK-NEXT: [[VAL1_I15:%.*]] = extractelement <32 x i1> [[VAL1]], i64 15 ; CHECK-NEXT: [[AND_I15:%.*]] = and i1 [[VAL0_I15]], [[VAL1_I15]] -; CHECK-NEXT: [[VAL1_I16:%.*]] = extractelement <32 x i1> [[VAL1]], i32 16 +; CHECK-NEXT: [[VAL1_I16:%.*]] = extractelement <32 x i1> [[VAL1]], i64 16 ; CHECK-NEXT: [[AND_I16:%.*]] = and i1 [[VAL0_I16]], [[VAL1_I16]] -; CHECK-NEXT: [[VAL1_I17:%.*]] = extractelement <32 x i1> [[VAL1]], i32 17 +; CHECK-NEXT: [[VAL1_I17:%.*]] = extractelement <32 x i1> [[VAL1]], i64 17 ; CHECK-NEXT: [[AND_I17:%.*]] = and i1 [[VAL0_I17]], [[VAL1_I17]] -; CHECK-NEXT: [[VAL1_I18:%.*]] = extractelement <32 x i1> [[VAL1]], i32 18 +; CHECK-NEXT: [[VAL1_I18:%.*]] = extractelement <32 x i1> [[VAL1]], i64 18 ; CHECK-NEXT: [[AND_I18:%.*]] = and i1 [[VAL0_I18]], [[VAL1_I18]] -; CHECK-NEXT: [[VAL1_I19:%.*]] = extractelement <32 x i1> [[VAL1]], i32 19 +; CHECK-NEXT: [[VAL1_I19:%.*]] = extractelement <32 x i1> [[VAL1]], i64 19 ; CHECK-NEXT: [[AND_I19:%.*]] = and i1 [[VAL0_I19]], [[VAL1_I19]] -; CHECK-NEXT: [[VAL1_I20:%.*]] = extractelement <32 x i1> [[VAL1]], i32 20 +; CHECK-NEXT: [[VAL1_I20:%.*]] = extractelement <32 x i1> [[VAL1]], i64 20 ; CHECK-NEXT: [[AND_I20:%.*]] = and i1 [[VAL0_I20]], [[VAL1_I20]] -; CHECK-NEXT: [[VAL1_I21:%.*]] = extractelement <32 x i1> [[VAL1]], i32 21 +; CHECK-NEXT: [[VAL1_I21:%.*]] = extractelement <32 x i1> [[VAL1]], i64 21 ; CHECK-NEXT: [[AND_I21:%.*]] = and i1 [[VAL0_I21]], [[VAL1_I21]] -; CHECK-NEXT: [[VAL1_I22:%.*]] = extractelement <32 x i1> [[VAL1]], i32 22 +; CHECK-NEXT: [[VAL1_I22:%.*]] = extractelement <32 x i1> [[VAL1]], i64 22 ; CHECK-NEXT: [[AND_I22:%.*]] = and i1 [[VAL0_I22]], [[VAL1_I22]] -; CHECK-NEXT: [[VAL1_I23:%.*]] = extractelement <32 x i1> [[VAL1]], i32 23 +; CHECK-NEXT: [[VAL1_I23:%.*]] = extractelement <32 x i1> [[VAL1]], i64 23 ; CHECK-NEXT: [[AND_I23:%.*]] = and i1 [[VAL0_I23]], [[VAL1_I23]] -; CHECK-NEXT: [[VAL1_I24:%.*]] = extractelement <32 x i1> [[VAL1]], i32 24 +; CHECK-NEXT: [[VAL1_I24:%.*]] = extractelement <32 x i1> [[VAL1]], i64 24 ; CHECK-NEXT: [[AND_I24:%.*]] = and i1 [[VAL0_I24]], [[VAL1_I24]] -; CHECK-NEXT: [[VAL1_I25:%.*]] = extractelement <32 x i1> [[VAL1]], i32 25 +; CHECK-NEXT: [[VAL1_I25:%.*]] = extractelement <32 x i1> [[VAL1]], i64 25 ; CHECK-NEXT: [[AND_I25:%.*]] = and i1 [[VAL0_I25]], [[VAL1_I25]] -; CHECK-NEXT: [[VAL1_I26:%.*]] = extractelement <32 x i1> [[VAL1]], i32 26 +; CHECK-NEXT: [[VAL1_I26:%.*]] = extractelement <32 x i1> [[VAL1]], i64 26 ; CHECK-NEXT: [[AND_I26:%.*]] = and i1 [[VAL0_I26]], [[VAL1_I26]] -; CHECK-NEXT: [[VAL1_I27:%.*]] = extractelement <32 x i1> [[VAL1]], i32 27 +; CHECK-NEXT: [[VAL1_I27:%.*]] = extractelement <32 x i1> [[VAL1]], i64 27 ; CHECK-NEXT: [[AND_I27:%.*]] = and i1 [[VAL0_I27]], [[VAL1_I27]] -; CHECK-NEXT: [[VAL1_I28:%.*]] = extractelement <32 x i1> [[VAL1]], i32 28 +; CHECK-NEXT: [[VAL1_I28:%.*]] = extractelement <32 x i1> [[VAL1]], i64 28 ; CHECK-NEXT: [[AND_I28:%.*]] = and i1 [[VAL0_I28]], [[VAL1_I28]] -; CHECK-NEXT: [[VAL1_I29:%.*]] = extractelement <32 x i1> [[VAL1]], i32 29 +; CHECK-NEXT: [[VAL1_I29:%.*]] = extractelement <32 x i1> [[VAL1]], i64 29 ; CHECK-NEXT: [[AND_I29:%.*]] = and i1 [[VAL0_I29]], [[VAL1_I29]] -; CHECK-NEXT: [[VAL1_I30:%.*]] = extractelement <32 x i1> [[VAL1]], i32 30 +; CHECK-NEXT: [[VAL1_I30:%.*]] = extractelement <32 x i1> [[VAL1]], i64 30 ; CHECK-NEXT: [[AND_I30:%.*]] = and i1 [[VAL0_I30]], [[VAL1_I30]] -; CHECK-NEXT: [[VAL1_I31:%.*]] = extractelement <32 x i1> [[VAL1]], i32 31 +; CHECK-NEXT: [[VAL1_I31:%.*]] = extractelement <32 x i1> [[VAL1]], i64 31 ; CHECK-NEXT: [[AND_I31:%.*]] = and i1 [[VAL0_I31]], [[VAL1_I31]] -; CHECK-NEXT: [[AND_UPTO0:%.*]] = insertelement <32 x i1> poison, i1 [[AND_I0]], i32 0 -; CHECK-NEXT: [[AND_UPTO1:%.*]] = insertelement <32 x i1> [[AND_UPTO0]], i1 [[AND_I1]], i32 1 -; CHECK-NEXT: [[AND_UPTO2:%.*]] = insertelement <32 x i1> [[AND_UPTO1]], i1 [[AND_I2]], i32 2 -; CHECK-NEXT: [[AND_UPTO3:%.*]] = insertelement <32 x i1> [[AND_UPTO2]], i1 [[AND_I3]], i32 3 -; CHECK-NEXT: [[AND_UPTO4:%.*]] = insertelement <32 x i1> [[AND_UPTO3]], i1 [[AND_I4]], i32 4 -; CHECK-NEXT: [[AND_UPTO5:%.*]] = insertelement <32 x i1> [[AND_UPTO4]], i1 [[AND_I5]], i32 5 -; CHECK-NEXT: [[AND_UPTO6:%.*]] = insertelement <32 x i1> [[AND_UPTO5]], i1 [[AND_I6]], i32 6 -; CHECK-NEXT: [[AND_UPTO7:%.*]] = insertelement <32 x i1> [[AND_UPTO6]], i1 [[AND_I7]], i32 7 -; CHECK-NEXT: [[AND_UPTO8:%.*]] = insertelement <32 x i1> [[AND_UPTO7]], i1 [[AND_I8]], i32 8 -; CHECK-NEXT: [[AND_UPTO9:%.*]] = insertelement <32 x i1> [[AND_UPTO8]], i1 [[AND_I9]], i32 9 -; CHECK-NEXT: [[AND_UPTO10:%.*]] = insertelement <32 x i1> [[AND_UPTO9]], i1 [[AND_I10]], i32 10 -; CHECK-NEXT: [[AND_UPTO11:%.*]] = insertelement <32 x i1> [[AND_UPTO10]], i1 [[AND_I11]], i32 11 -; CHECK-NEXT: [[AND_UPTO12:%.*]] = insertelement <32 x i1> [[AND_UPTO11]], i1 [[AND_I12]], i32 12 -; CHECK-NEXT: [[AND_UPTO13:%.*]] = insertelement <32 x i1> [[AND_UPTO12]], i1 [[AND_I13]], i32 13 -; CHECK-NEXT: [[AND_UPTO14:%.*]] = insertelement <32 x i1> [[AND_UPTO13]], i1 [[AND_I14]], i32 14 -; CHECK-NEXT: [[AND_UPTO15:%.*]] = insertelement <32 x i1> [[AND_UPTO14]], i1 [[AND_I15]], i32 15 -; CHECK-NEXT: [[AND_UPTO16:%.*]] = insertelement <32 x i1> [[AND_UPTO15]], i1 [[AND_I16]], i32 16 -; CHECK-NEXT: [[AND_UPTO17:%.*]] = insertelement <32 x i1> [[AND_UPTO16]], i1 [[AND_I17]], i32 17 -; CHECK-NEXT: [[AND_UPTO18:%.*]] = insertelement <32 x i1> [[AND_UPTO17]], i1 [[AND_I18]], i32 18 -; CHECK-NEXT: [[AND_UPTO19:%.*]] = insertelement <32 x i1> [[AND_UPTO18]], i1 [[AND_I19]], i32 19 -; CHECK-NEXT: [[AND_UPTO20:%.*]] = insertelement <32 x i1> [[AND_UPTO19]], i1 [[AND_I20]], i32 20 -; CHECK-NEXT: [[AND_UPTO21:%.*]] = insertelement <32 x i1> [[AND_UPTO20]], i1 [[AND_I21]], i32 21 -; CHECK-NEXT: [[AND_UPTO22:%.*]] = insertelement <32 x i1> [[AND_UPTO21]], i1 [[AND_I22]], i32 22 -; CHECK-NEXT: [[AND_UPTO23:%.*]] = insertelement <32 x i1> [[AND_UPTO22]], i1 [[AND_I23]], i32 23 -; CHECK-NEXT: [[AND_UPTO24:%.*]] = insertelement <32 x i1> [[AND_UPTO23]], i1 [[AND_I24]], i32 24 -; CHECK-NEXT: [[AND_UPTO25:%.*]] = insertelement <32 x i1> [[AND_UPTO24]], i1 [[AND_I25]], i32 25 -; CHECK-NEXT: [[AND_UPTO26:%.*]] = insertelement <32 x i1> [[AND_UPTO25]], i1 [[AND_I26]], i32 26 -; CHECK-NEXT: [[AND_UPTO27:%.*]] = insertelement <32 x i1> [[AND_UPTO26]], i1 [[AND_I27]], i32 27 -; CHECK-NEXT: [[AND_UPTO28:%.*]] = insertelement <32 x i1> [[AND_UPTO27]], i1 [[AND_I28]], i32 28 -; CHECK-NEXT: [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i32 29 -; CHECK-NEXT: [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i32 30 -; CHECK-NEXT: [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i32 31 +; CHECK-NEXT: [[AND_UPTO0:%.*]] = insertelement <32 x i1> poison, i1 [[AND_I0]], i64 0 +; CHECK-NEXT: [[AND_UPTO1:%.*]] = insertelement <32 x i1> [[AND_UPTO0]], i1 [[AND_I1]], i64 1 +; CHECK-NEXT: [[AND_UPTO2:%.*]] = insertelement <32 x i1> [[AND_UPTO1]], i1 [[AND_I2]], i64 2 +; CHECK-NEXT: [[AND_UPTO3:%.*]] = insertelement <32 x i1> [[AND_UPTO2]], i1 [[AND_I3]], i64 3 +; CHECK-NEXT: [[AND_UPTO4:%.*]] = insertelement <32 x i1> [[AND_UPTO3]], i1 [[AND_I4]], i64 4 +; CHECK-NEXT: [[AND_UPTO5:%.*]] = insertelement <32 x i1> [[AND_UPTO4]], i1 [[AND_I5]], i64 5 +; CHECK-NEXT: [[AND_UPTO6:%.*]] = insertelement <32 x i1> [[AND_UPTO5]], i1 [[AND_I6]], i64 6 +; CHECK-NEXT: [[AND_UPTO7:%.*]] = insertelement <32 x i1> [[AND_UPTO6]], i1 [[AND_I7]], i64 7 +; CHECK-NEXT: [[AND_UPTO8:%.*]] = insertelement <32 x i1> [[AND_UPTO7]], i1 [[AND_I8]], i64 8 +; CHECK-NEXT: [[AND_UPTO9:%.*]] = insertelement <32 x i1> [[AND_UPTO8]], i1 [[AND_I9]], i64 9 +; CHECK-NEXT: [[AND_UPTO10:%.*]] = insertelement <32 x i1> [[AND_UPTO9]], i1 [[AND_I10]], i64 10 +; CHECK-NEXT: [[AND_UPTO11:%.*]] = insertelement <32 x i1> [[AND_UPTO10]], i1 [[AND_I11]], i64 11 +; CHECK-NEXT: [[AND_UPTO12:%.*]] = insertelement <32 x i1> [[AND_UPTO11]], i1 [[AND_I12]], i64 12 +; CHECK-NEXT: [[AND_UPTO13:%.*]] = insertelement <32 x i1> [[AND_UPTO12]], i1 [[AND_I13]], i64 13 +; CHECK-NEXT: [[AND_UPTO14:%.*]] = insertelement <32 x i1> [[AND_UPTO13]], i1 [[AND_I14]], i64 14 +; CHECK-NEXT: [[AND_UPTO15:%.*]] = insertelement <32 x i1> [[AND_UPTO14]], i1 [[AND_I15]], i64 15 +; CHECK-NEXT: [[AND_UPTO16:%.*]] = insertelement <32 x i1> [[AND_UPTO15]], i1 [[AND_I16]], i64 16 +; CHECK-NEXT: [[AND_UPTO17:%.*]] = insertelement <32 x i1> [[AND_UPTO16]], i1 [[AND_I17]], i64 17 +; CHECK-NEXT: [[AND_UPTO18:%.*]] = insertelement <32 x i1> [[AND_UPTO17]], i1 [[AND_I18]], i64 18 +; CHECK-NEXT: [[AND_UPTO19:%.*]] = insertelement <32 x i1> [[AND_UPTO18]], i1 [[AND_I19]], i64 19 +; CHECK-NEXT: [[AND_UPTO20:%.*]] = insertelement <32 x i1> [[AND_UPTO19]], i1 [[AND_I20]], i64 20 +; CHECK-NEXT: [[AND_UPTO21:%.*]] = insertelement <32 x i1> [[AND_UPTO20]], i1 [[AND_I21]], i64 21 +; CHECK-NEXT: [[AND_UPTO22:%.*]] = insertelement <32 x i1> [[AND_UPTO21]], i1 [[AND_I22]], i64 22 +; CHECK-NEXT: [[AND_UPTO23:%.*]] = insertelement <32 x i1> [[AND_UPTO22]], i1 [[AND_I23]], i64 23 +; CHECK-NEXT: [[AND_UPTO24:%.*]] = insertelement <32 x i1> [[AND_UPTO23]], i1 [[AND_I24]], i64 24 +; CHECK-NEXT: [[AND_UPTO25:%.*]] = insertelement <32 x i1> [[AND_UPTO24]], i1 [[AND_I25]], i64 25 +; CHECK-NEXT: [[AND_UPTO26:%.*]] = insertelement <32 x i1> [[AND_UPTO25]], i1 [[AND_I26]], i64 26 +; CHECK-NEXT: [[AND_UPTO27:%.*]] = insertelement <32 x i1> [[AND_UPTO26]], i1 [[AND_I27]], i64 27 +; CHECK-NEXT: [[AND_UPTO28:%.*]] = insertelement <32 x i1> [[AND_UPTO27]], i1 [[AND_I28]], i64 28 +; CHECK-NEXT: [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i64 29 +; CHECK-NEXT: [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i64 30 +; CHECK-NEXT: [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i64 31 ; CHECK-NEXT: store <32 x i1> [[AND]], ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -559,17 +559,17 @@ ; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3 -; CHECK-NEXT: [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i32 0 -; CHECK-NEXT: [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR:%.*]], i32 0 +; CHECK-NEXT: [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR:%.*]], i64 0 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i64 0 ; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I0]], i32 0, i32 [[I_I0]] -; CHECK-NEXT: [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i32 1 -; CHECK-NEXT: [[PTR_I1:%.*]] = extractelement <4 x ptr> [[PTR]], i32 1 +; CHECK-NEXT: [[PTR_I1:%.*]] = extractelement <4 x ptr> [[PTR]], i64 1 +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i64 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I1]], i32 1, i32 [[I_I1]] -; CHECK-NEXT: [[I_I2:%.*]] = extractelement <4 x i32> [[I]], i32 2 -; CHECK-NEXT: [[PTR_I2:%.*]] = extractelement <4 x ptr> [[PTR]], i32 2 +; CHECK-NEXT: [[PTR_I2:%.*]] = extractelement <4 x ptr> [[PTR]], i64 2 +; CHECK-NEXT: [[I_I2:%.*]] = extractelement <4 x i32> [[I]], i64 2 ; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I2]], i32 2, i32 [[I_I2]] -; CHECK-NEXT: [[I_I3:%.*]] = extractelement <4 x i32> [[I]], i32 3 -; CHECK-NEXT: [[PTR_I3:%.*]] = extractelement <4 x ptr> [[PTR]], i32 3 +; CHECK-NEXT: [[PTR_I3:%.*]] = extractelement <4 x ptr> [[PTR]], i64 3 +; CHECK-NEXT: [[I_I3:%.*]] = extractelement <4 x i32> [[I]], i64 3 ; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I3]], i32 3, i32 [[I_I3]] ; CHECK-NEXT: store ptr [[VAL_I0]], ptr [[DEST]], align 32 ; CHECK-NEXT: store ptr [[VAL_I1]], ptr [[DEST_I1]], align 8 @@ -589,10 +589,10 @@ define <4 x float> @f14(<4 x float> %acc, i32 %count) { ; CHECK-LABEL: @f14( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i32 0 -; CHECK-NEXT: [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i32 1 -; CHECK-NEXT: [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i32 2 -; CHECK-NEXT: [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i32 3 +; CHECK-NEXT: [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i64 0 +; CHECK-NEXT: [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i64 1 +; CHECK-NEXT: [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i64 2 +; CHECK-NEXT: [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i64 3 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ] @@ -600,23 +600,23 @@ ; CHECK-NEXT: [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i32 0 -; CHECK-NEXT: [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i32 1 -; CHECK-NEXT: [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i32 2 -; CHECK-NEXT: [[THIS_ACC:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO2]], float [[THIS_ACC_I3]], i32 3 +; CHECK-NEXT: [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i64 0 +; CHECK-NEXT: [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i64 1 +; CHECK-NEXT: [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i64 2 +; CHECK-NEXT: [[THIS_ACC:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO2]], float [[THIS_ACC_I3]], i64 3 ; CHECK-NEXT: [[FOO:%.*]] = call <4 x float> @ext(<4 x float> [[THIS_ACC]]) -; CHECK-NEXT: [[FOO_I0:%.*]] = extractelement <4 x float> [[FOO]], i32 0 +; CHECK-NEXT: [[FOO_I0:%.*]] = extractelement <4 x float> [[FOO]], i64 0 ; CHECK-NEXT: [[NEXT_ACC_I0]] = fadd float [[THIS_ACC_I0]], [[FOO_I0]] -; CHECK-NEXT: [[FOO_I1:%.*]] = extractelement <4 x float> [[FOO]], i32 1 +; CHECK-NEXT: [[FOO_I1:%.*]] = extractelement <4 x float> [[FOO]], i64 1 ; CHECK-NEXT: [[NEXT_ACC_I1]] = fadd float [[THIS_ACC_I1]], [[FOO_I1]] -; CHECK-NEXT: [[FOO_I2:%.*]] = extractelement <4 x float> [[FOO]], i32 2 +; CHECK-NEXT: [[FOO_I2:%.*]] = extractelement <4 x float> [[FOO]], i64 2 ; CHECK-NEXT: [[NEXT_ACC_I2]] = fadd float [[THIS_ACC_I2]], [[FOO_I2]] -; CHECK-NEXT: [[FOO_I3:%.*]] = extractelement <4 x float> [[FOO]], i32 3 +; CHECK-NEXT: [[FOO_I3:%.*]] = extractelement <4 x float> [[FOO]], i64 3 ; CHECK-NEXT: [[NEXT_ACC_I3]] = fadd float [[THIS_ACC_I3]], [[FOO_I3]] -; CHECK-NEXT: [[NEXT_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEXT_ACC_I0]], i32 0 -; CHECK-NEXT: [[NEXT_ACC_UPTO1:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO0]], float [[NEXT_ACC_I1]], i32 1 -; CHECK-NEXT: [[NEXT_ACC_UPTO2:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO1]], float [[NEXT_ACC_I2]], i32 2 -; CHECK-NEXT: [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i32 3 +; CHECK-NEXT: [[NEXT_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEXT_ACC_I0]], i64 0 +; CHECK-NEXT: [[NEXT_ACC_UPTO1:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO0]], float [[NEXT_ACC_I1]], i64 1 +; CHECK-NEXT: [[NEXT_ACC_UPTO2:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO1]], float [[NEXT_ACC_I2]], i64 2 +; CHECK-NEXT: [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i64 3 ; CHECK-NEXT: [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] @@ -659,18 +659,18 @@ ; CHECK-NEXT: [[NEG_I1:%.*]] = fneg float [[VAL_I1]] ; CHECK-NEXT: [[NEG_I2:%.*]] = fneg float [[VAL_I2]] ; CHECK-NEXT: [[NEG_I3:%.*]] = fneg float [[VAL_I3]] -; CHECK-NEXT: [[NEG_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEG_I0]], i32 0 -; CHECK-NEXT: [[NEG_UPTO1:%.*]] = insertelement <4 x float> [[NEG_UPTO0]], float [[NEG_I1]], i32 1 -; CHECK-NEXT: [[NEG_UPTO2:%.*]] = insertelement <4 x float> [[NEG_UPTO1]], float [[NEG_I2]], i32 2 -; CHECK-NEXT: [[NEG:%.*]] = insertelement <4 x float> [[NEG_UPTO2]], float [[NEG_I3]], i32 3 +; CHECK-NEXT: [[NEG_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEG_I0]], i64 0 +; CHECK-NEXT: [[NEG_UPTO1:%.*]] = insertelement <4 x float> [[NEG_UPTO0]], float [[NEG_I1]], i64 1 +; CHECK-NEXT: [[NEG_UPTO2:%.*]] = insertelement <4 x float> [[NEG_UPTO1]], float [[NEG_I2]], i64 2 +; CHECK-NEXT: [[NEG:%.*]] = insertelement <4 x float> [[NEG_UPTO2]], float [[NEG_I3]], i64 3 ; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[NEG]]) -; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0 +; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i64 0 ; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00 -; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1 +; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i64 1 ; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00 -; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2 +; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i64 2 ; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00 -; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3 +; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i64 3 ; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00 ; CHECK-NEXT: [[SEL_I0:%.*]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00 ; CHECK-NEXT: [[SEL_I1:%.*]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00 @@ -713,14 +713,14 @@ ; Check that IR flags are preserved. define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) { ; CHECK-LABEL: @f16( -; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 -; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]] -; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 -; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1 +; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]] -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[RES]] ; %res = add nuw nsw <2 x i32> %i, %j @@ -728,14 +728,14 @@ } define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) { ; CHECK-LABEL: @f17( -; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 -; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]] -; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 -; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1 +; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = sdiv exact i32 [[I_I1]], [[J_I1]] -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[RES]] ; %res = sdiv exact <2 x i32> %i, %j @@ -743,14 +743,14 @@ } define <2 x float> @f18(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @f18( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]] -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = fadd fast float [[X_I1]], [[Y_I1]] -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x float> [[RES]] ; %res = fadd fast <2 x float> %x, %y @@ -758,12 +758,12 @@ } define <2 x float> @f19(<2 x float> %x) { ; CHECK-LABEL: @f19( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = fneg fast float [[X_I0]] -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = fneg fast float [[X_I1]] -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x float> [[RES]] ; %res = fneg fast <2 x float> %x @@ -771,14 +771,14 @@ } define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @f20( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]] -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = fcmp fast ogt float [[X_I1]], [[Y_I1]] -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i1> [[RES_UPTO0]], i1 [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i1> [[RES_UPTO0]], i1 [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x i1> [[RES]] ; %res = fcmp fast ogt <2 x float> %x, %y @@ -787,12 +787,12 @@ declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) define <2 x float> @f21(<2 x float> %x) { ; CHECK-LABEL: @f21( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]]) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]]) -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x float> [[RES]] ; %res = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) @@ -801,16 +801,16 @@ declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) { ; CHECK-LABEL: @f22( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 -; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 +; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]]) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 -; CHECK-NEXT: [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 +; CHECK-NEXT: [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.fma.f32(float [[X_I1]], float [[Y_I1]], float [[Z_I1]]) -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x float> [[RES]] ; %res = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) @@ -820,9 +820,9 @@ ; See https://reviews.llvm.org/D83101#2133062 define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) { ; CHECK-LABEL: @f23_crash( -; CHECK-NEXT: [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i32 0 -; CHECK-NEXT: [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SRCVEC_I0]], i32 0 -; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i32 1 +; CHECK-NEXT: [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i64 0 +; CHECK-NEXT: [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SRCVEC_I0]], i64 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[T1]] ; %v0 = extractelement <2 x i32> %srcvec, i32 0 diff --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll --- a/llvm/test/Transforms/Scalarizer/basic.ll +++ b/llvm/test/Transforms/Scalarizer/basic.ll @@ -8,10 +8,10 @@ define void @f1(<4 x float> %init, ptr %base, i32 %count) { ; CHECK-LABEL: @f1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i32 0 -; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i32 1 -; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i32 2 -; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i32 3 +; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i64 0 +; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i64 1 +; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i64 2 +; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i64 3 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -32,18 +32,18 @@ ; CHECK-NEXT: [[ADD_I1:%.*]] = fadd float [[VAL_I1]], [[VAL_I3]] ; CHECK-NEXT: [[ADD_I2:%.*]] = fadd float [[ACC_I0]], [[ACC_I2]] ; CHECK-NEXT: [[ADD_I3:%.*]] = fadd float [[ACC_I1]], [[ACC_I3]] -; CHECK-NEXT: [[ADD_UPTO0:%.*]] = insertelement <4 x float> poison, float [[ADD_I0]], i32 0 -; CHECK-NEXT: [[ADD_UPTO1:%.*]] = insertelement <4 x float> [[ADD_UPTO0]], float [[ADD_I1]], i32 1 -; CHECK-NEXT: [[ADD_UPTO2:%.*]] = insertelement <4 x float> [[ADD_UPTO1]], float [[ADD_I2]], i32 2 -; CHECK-NEXT: [[ADD:%.*]] = insertelement <4 x float> [[ADD_UPTO2]], float [[ADD_I3]], i32 3 +; CHECK-NEXT: [[ADD_UPTO0:%.*]] = insertelement <4 x float> poison, float [[ADD_I0]], i64 0 +; CHECK-NEXT: [[ADD_UPTO1:%.*]] = insertelement <4 x float> [[ADD_UPTO0]], float [[ADD_I1]], i64 1 +; CHECK-NEXT: [[ADD_UPTO2:%.*]] = insertelement <4 x float> [[ADD_UPTO1]], float [[ADD_I2]], i64 2 +; CHECK-NEXT: [[ADD:%.*]] = insertelement <4 x float> [[ADD_UPTO2]], float [[ADD_I3]], i64 3 ; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[ADD]]) -; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0 +; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i64 0 ; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00 -; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1 +; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i64 1 ; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00 -; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2 +; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i64 2 ; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00 -; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3 +; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i64 3 ; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00 ; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00 ; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00 @@ -94,10 +94,10 @@ define void @f2(<4 x i32> %init, ptr %base, i32 %count) { ; CHECK-LABEL: @f2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i32 0 -; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i32 1 -; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i32 2 -; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i32 3 +; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i64 0 +; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i64 1 +; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i64 2 +; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i64 3 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -282,18 +282,18 @@ ; Check that fpmath information is preserved. define <4 x float> @f6(<4 x float> %x) { ; CHECK-LABEL: @f6( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath !9 -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <4 x float> [[X]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <4 x float> [[X]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath !9 -; CHECK-NEXT: [[X_I2:%.*]] = extractelement <4 x float> [[X]], i32 2 +; CHECK-NEXT: [[X_I2:%.*]] = extractelement <4 x float> [[X]], i64 2 ; CHECK-NEXT: [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath !9 -; CHECK-NEXT: [[X_I3:%.*]] = extractelement <4 x float> [[X]], i32 3 +; CHECK-NEXT: [[X_I3:%.*]] = extractelement <4 x float> [[X]], i64 3 ; CHECK-NEXT: [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath !9 -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 -; CHECK-NEXT: [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i32 2 -; CHECK-NEXT: [[RES:%.*]] = insertelement <4 x float> [[RES_UPTO2]], float [[RES_I3]], i32 3 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 +; CHECK-NEXT: [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i64 2 +; CHECK-NEXT: [[RES:%.*]] = insertelement <4 x float> [[RES_UPTO2]], float [[RES_I3]], i64 3 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %res = fadd <4 x float> %x, , @@ -336,11 +336,11 @@ ; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3 -; CHECK-NEXT: [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0:%.*]], i32 0 -; CHECK-NEXT: [[PTR0_I2:%.*]] = extractelement <4 x ptr> [[PTR0]], i32 2 -; CHECK-NEXT: [[PTR0_I3:%.*]] = extractelement <4 x ptr> [[PTR0]], i32 3 -; CHECK-NEXT: [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i32 1 -; CHECK-NEXT: [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i32 3 +; CHECK-NEXT: [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0:%.*]], i64 0 +; CHECK-NEXT: [[PTR0_I2:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 2 +; CHECK-NEXT: [[PTR0_I3:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 3 +; CHECK-NEXT: [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i64 1 +; CHECK-NEXT: [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i64 3 ; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr float, ptr [[PTR0_I0]], i32 100 ; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr float, ptr [[OTHER:%.*]], i32 [[I0_I1]] ; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr float, ptr [[PTR0_I2]], i32 100 @@ -413,135 +413,135 @@ ; CHECK-LABEL: @f11( ; CHECK-NEXT: [[SRC1:%.*]] = getelementptr <32 x i1>, ptr [[SRC0:%.*]], i32 1 ; CHECK-NEXT: [[VAL0:%.*]] = load <32 x i1>, ptr [[SRC0]], align 4 -; CHECK-NEXT: [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i32 0 -; CHECK-NEXT: [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i32 1 -; CHECK-NEXT: [[VAL0_I2:%.*]] = extractelement <32 x i1> [[VAL0]], i32 2 -; CHECK-NEXT: [[VAL0_I3:%.*]] = extractelement <32 x i1> [[VAL0]], i32 3 -; CHECK-NEXT: [[VAL0_I4:%.*]] = extractelement <32 x i1> [[VAL0]], i32 4 -; CHECK-NEXT: [[VAL0_I5:%.*]] = extractelement <32 x i1> [[VAL0]], i32 5 -; CHECK-NEXT: [[VAL0_I6:%.*]] = extractelement <32 x i1> [[VAL0]], i32 6 -; CHECK-NEXT: [[VAL0_I7:%.*]] = extractelement <32 x i1> [[VAL0]], i32 7 -; CHECK-NEXT: [[VAL0_I8:%.*]] = extractelement <32 x i1> [[VAL0]], i32 8 -; CHECK-NEXT: [[VAL0_I9:%.*]] = extractelement <32 x i1> [[VAL0]], i32 9 -; CHECK-NEXT: [[VAL0_I10:%.*]] = extractelement <32 x i1> [[VAL0]], i32 10 -; CHECK-NEXT: [[VAL0_I11:%.*]] = extractelement <32 x i1> [[VAL0]], i32 11 -; CHECK-NEXT: [[VAL0_I12:%.*]] = extractelement <32 x i1> [[VAL0]], i32 12 -; CHECK-NEXT: [[VAL0_I13:%.*]] = extractelement <32 x i1> [[VAL0]], i32 13 -; CHECK-NEXT: [[VAL0_I14:%.*]] = extractelement <32 x i1> [[VAL0]], i32 14 -; CHECK-NEXT: [[VAL0_I15:%.*]] = extractelement <32 x i1> [[VAL0]], i32 15 -; CHECK-NEXT: [[VAL0_I16:%.*]] = extractelement <32 x i1> [[VAL0]], i32 16 -; CHECK-NEXT: [[VAL0_I17:%.*]] = extractelement <32 x i1> [[VAL0]], i32 17 -; CHECK-NEXT: [[VAL0_I18:%.*]] = extractelement <32 x i1> [[VAL0]], i32 18 -; CHECK-NEXT: [[VAL0_I19:%.*]] = extractelement <32 x i1> [[VAL0]], i32 19 -; CHECK-NEXT: [[VAL0_I20:%.*]] = extractelement <32 x i1> [[VAL0]], i32 20 -; CHECK-NEXT: [[VAL0_I21:%.*]] = extractelement <32 x i1> [[VAL0]], i32 21 -; CHECK-NEXT: [[VAL0_I22:%.*]] = extractelement <32 x i1> [[VAL0]], i32 22 -; CHECK-NEXT: [[VAL0_I23:%.*]] = extractelement <32 x i1> [[VAL0]], i32 23 -; CHECK-NEXT: [[VAL0_I24:%.*]] = extractelement <32 x i1> [[VAL0]], i32 24 -; CHECK-NEXT: [[VAL0_I25:%.*]] = extractelement <32 x i1> [[VAL0]], i32 25 -; CHECK-NEXT: [[VAL0_I26:%.*]] = extractelement <32 x i1> [[VAL0]], i32 26 -; CHECK-NEXT: [[VAL0_I27:%.*]] = extractelement <32 x i1> [[VAL0]], i32 27 -; CHECK-NEXT: [[VAL0_I28:%.*]] = extractelement <32 x i1> [[VAL0]], i32 28 -; CHECK-NEXT: [[VAL0_I29:%.*]] = extractelement <32 x i1> [[VAL0]], i32 29 -; CHECK-NEXT: [[VAL0_I30:%.*]] = extractelement <32 x i1> [[VAL0]], i32 30 -; CHECK-NEXT: [[VAL0_I31:%.*]] = extractelement <32 x i1> [[VAL0]], i32 31 +; CHECK-NEXT: [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i64 0 +; CHECK-NEXT: [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i64 1 +; CHECK-NEXT: [[VAL0_I2:%.*]] = extractelement <32 x i1> [[VAL0]], i64 2 +; CHECK-NEXT: [[VAL0_I3:%.*]] = extractelement <32 x i1> [[VAL0]], i64 3 +; CHECK-NEXT: [[VAL0_I4:%.*]] = extractelement <32 x i1> [[VAL0]], i64 4 +; CHECK-NEXT: [[VAL0_I5:%.*]] = extractelement <32 x i1> [[VAL0]], i64 5 +; CHECK-NEXT: [[VAL0_I6:%.*]] = extractelement <32 x i1> [[VAL0]], i64 6 +; CHECK-NEXT: [[VAL0_I7:%.*]] = extractelement <32 x i1> [[VAL0]], i64 7 +; CHECK-NEXT: [[VAL0_I8:%.*]] = extractelement <32 x i1> [[VAL0]], i64 8 +; CHECK-NEXT: [[VAL0_I9:%.*]] = extractelement <32 x i1> [[VAL0]], i64 9 +; CHECK-NEXT: [[VAL0_I10:%.*]] = extractelement <32 x i1> [[VAL0]], i64 10 +; CHECK-NEXT: [[VAL0_I11:%.*]] = extractelement <32 x i1> [[VAL0]], i64 11 +; CHECK-NEXT: [[VAL0_I12:%.*]] = extractelement <32 x i1> [[VAL0]], i64 12 +; CHECK-NEXT: [[VAL0_I13:%.*]] = extractelement <32 x i1> [[VAL0]], i64 13 +; CHECK-NEXT: [[VAL0_I14:%.*]] = extractelement <32 x i1> [[VAL0]], i64 14 +; CHECK-NEXT: [[VAL0_I15:%.*]] = extractelement <32 x i1> [[VAL0]], i64 15 +; CHECK-NEXT: [[VAL0_I16:%.*]] = extractelement <32 x i1> [[VAL0]], i64 16 +; CHECK-NEXT: [[VAL0_I17:%.*]] = extractelement <32 x i1> [[VAL0]], i64 17 +; CHECK-NEXT: [[VAL0_I18:%.*]] = extractelement <32 x i1> [[VAL0]], i64 18 +; CHECK-NEXT: [[VAL0_I19:%.*]] = extractelement <32 x i1> [[VAL0]], i64 19 +; CHECK-NEXT: [[VAL0_I20:%.*]] = extractelement <32 x i1> [[VAL0]], i64 20 +; CHECK-NEXT: [[VAL0_I21:%.*]] = extractelement <32 x i1> [[VAL0]], i64 21 +; CHECK-NEXT: [[VAL0_I22:%.*]] = extractelement <32 x i1> [[VAL0]], i64 22 +; CHECK-NEXT: [[VAL0_I23:%.*]] = extractelement <32 x i1> [[VAL0]], i64 23 +; CHECK-NEXT: [[VAL0_I24:%.*]] = extractelement <32 x i1> [[VAL0]], i64 24 +; CHECK-NEXT: [[VAL0_I25:%.*]] = extractelement <32 x i1> [[VAL0]], i64 25 +; CHECK-NEXT: [[VAL0_I26:%.*]] = extractelement <32 x i1> [[VAL0]], i64 26 +; CHECK-NEXT: [[VAL0_I27:%.*]] = extractelement <32 x i1> [[VAL0]], i64 27 +; CHECK-NEXT: [[VAL0_I28:%.*]] = extractelement <32 x i1> [[VAL0]], i64 28 +; CHECK-NEXT: [[VAL0_I29:%.*]] = extractelement <32 x i1> [[VAL0]], i64 29 +; CHECK-NEXT: [[VAL0_I30:%.*]] = extractelement <32 x i1> [[VAL0]], i64 30 +; CHECK-NEXT: [[VAL0_I31:%.*]] = extractelement <32 x i1> [[VAL0]], i64 31 ; CHECK-NEXT: [[VAL1:%.*]] = load <32 x i1>, ptr [[SRC1]], align 4 -; CHECK-NEXT: [[VAL1_I0:%.*]] = extractelement <32 x i1> [[VAL1]], i32 0 +; CHECK-NEXT: [[VAL1_I0:%.*]] = extractelement <32 x i1> [[VAL1]], i64 0 ; CHECK-NEXT: [[AND_I0:%.*]] = and i1 [[VAL0_I0]], [[VAL1_I0]] -; CHECK-NEXT: [[VAL1_I1:%.*]] = extractelement <32 x i1> [[VAL1]], i32 1 +; CHECK-NEXT: [[VAL1_I1:%.*]] = extractelement <32 x i1> [[VAL1]], i64 1 ; CHECK-NEXT: [[AND_I1:%.*]] = and i1 [[VAL0_I1]], [[VAL1_I1]] -; CHECK-NEXT: [[VAL1_I2:%.*]] = extractelement <32 x i1> [[VAL1]], i32 2 +; CHECK-NEXT: [[VAL1_I2:%.*]] = extractelement <32 x i1> [[VAL1]], i64 2 ; CHECK-NEXT: [[AND_I2:%.*]] = and i1 [[VAL0_I2]], [[VAL1_I2]] -; CHECK-NEXT: [[VAL1_I3:%.*]] = extractelement <32 x i1> [[VAL1]], i32 3 +; CHECK-NEXT: [[VAL1_I3:%.*]] = extractelement <32 x i1> [[VAL1]], i64 3 ; CHECK-NEXT: [[AND_I3:%.*]] = and i1 [[VAL0_I3]], [[VAL1_I3]] -; CHECK-NEXT: [[VAL1_I4:%.*]] = extractelement <32 x i1> [[VAL1]], i32 4 +; CHECK-NEXT: [[VAL1_I4:%.*]] = extractelement <32 x i1> [[VAL1]], i64 4 ; CHECK-NEXT: [[AND_I4:%.*]] = and i1 [[VAL0_I4]], [[VAL1_I4]] -; CHECK-NEXT: [[VAL1_I5:%.*]] = extractelement <32 x i1> [[VAL1]], i32 5 +; CHECK-NEXT: [[VAL1_I5:%.*]] = extractelement <32 x i1> [[VAL1]], i64 5 ; CHECK-NEXT: [[AND_I5:%.*]] = and i1 [[VAL0_I5]], [[VAL1_I5]] -; CHECK-NEXT: [[VAL1_I6:%.*]] = extractelement <32 x i1> [[VAL1]], i32 6 +; CHECK-NEXT: [[VAL1_I6:%.*]] = extractelement <32 x i1> [[VAL1]], i64 6 ; CHECK-NEXT: [[AND_I6:%.*]] = and i1 [[VAL0_I6]], [[VAL1_I6]] -; CHECK-NEXT: [[VAL1_I7:%.*]] = extractelement <32 x i1> [[VAL1]], i32 7 +; CHECK-NEXT: [[VAL1_I7:%.*]] = extractelement <32 x i1> [[VAL1]], i64 7 ; CHECK-NEXT: [[AND_I7:%.*]] = and i1 [[VAL0_I7]], [[VAL1_I7]] -; CHECK-NEXT: [[VAL1_I8:%.*]] = extractelement <32 x i1> [[VAL1]], i32 8 +; CHECK-NEXT: [[VAL1_I8:%.*]] = extractelement <32 x i1> [[VAL1]], i64 8 ; CHECK-NEXT: [[AND_I8:%.*]] = and i1 [[VAL0_I8]], [[VAL1_I8]] -; CHECK-NEXT: [[VAL1_I9:%.*]] = extractelement <32 x i1> [[VAL1]], i32 9 +; CHECK-NEXT: [[VAL1_I9:%.*]] = extractelement <32 x i1> [[VAL1]], i64 9 ; CHECK-NEXT: [[AND_I9:%.*]] = and i1 [[VAL0_I9]], [[VAL1_I9]] -; CHECK-NEXT: [[VAL1_I10:%.*]] = extractelement <32 x i1> [[VAL1]], i32 10 +; CHECK-NEXT: [[VAL1_I10:%.*]] = extractelement <32 x i1> [[VAL1]], i64 10 ; CHECK-NEXT: [[AND_I10:%.*]] = and i1 [[VAL0_I10]], [[VAL1_I10]] -; CHECK-NEXT: [[VAL1_I11:%.*]] = extractelement <32 x i1> [[VAL1]], i32 11 +; CHECK-NEXT: [[VAL1_I11:%.*]] = extractelement <32 x i1> [[VAL1]], i64 11 ; CHECK-NEXT: [[AND_I11:%.*]] = and i1 [[VAL0_I11]], [[VAL1_I11]] -; CHECK-NEXT: [[VAL1_I12:%.*]] = extractelement <32 x i1> [[VAL1]], i32 12 +; CHECK-NEXT: [[VAL1_I12:%.*]] = extractelement <32 x i1> [[VAL1]], i64 12 ; CHECK-NEXT: [[AND_I12:%.*]] = and i1 [[VAL0_I12]], [[VAL1_I12]] -; CHECK-NEXT: [[VAL1_I13:%.*]] = extractelement <32 x i1> [[VAL1]], i32 13 +; CHECK-NEXT: [[VAL1_I13:%.*]] = extractelement <32 x i1> [[VAL1]], i64 13 ; CHECK-NEXT: [[AND_I13:%.*]] = and i1 [[VAL0_I13]], [[VAL1_I13]] -; CHECK-NEXT: [[VAL1_I14:%.*]] = extractelement <32 x i1> [[VAL1]], i32 14 +; CHECK-NEXT: [[VAL1_I14:%.*]] = extractelement <32 x i1> [[VAL1]], i64 14 ; CHECK-NEXT: [[AND_I14:%.*]] = and i1 [[VAL0_I14]], [[VAL1_I14]] -; CHECK-NEXT: [[VAL1_I15:%.*]] = extractelement <32 x i1> [[VAL1]], i32 15 +; CHECK-NEXT: [[VAL1_I15:%.*]] = extractelement <32 x i1> [[VAL1]], i64 15 ; CHECK-NEXT: [[AND_I15:%.*]] = and i1 [[VAL0_I15]], [[VAL1_I15]] -; CHECK-NEXT: [[VAL1_I16:%.*]] = extractelement <32 x i1> [[VAL1]], i32 16 +; CHECK-NEXT: [[VAL1_I16:%.*]] = extractelement <32 x i1> [[VAL1]], i64 16 ; CHECK-NEXT: [[AND_I16:%.*]] = and i1 [[VAL0_I16]], [[VAL1_I16]] -; CHECK-NEXT: [[VAL1_I17:%.*]] = extractelement <32 x i1> [[VAL1]], i32 17 +; CHECK-NEXT: [[VAL1_I17:%.*]] = extractelement <32 x i1> [[VAL1]], i64 17 ; CHECK-NEXT: [[AND_I17:%.*]] = and i1 [[VAL0_I17]], [[VAL1_I17]] -; CHECK-NEXT: [[VAL1_I18:%.*]] = extractelement <32 x i1> [[VAL1]], i32 18 +; CHECK-NEXT: [[VAL1_I18:%.*]] = extractelement <32 x i1> [[VAL1]], i64 18 ; CHECK-NEXT: [[AND_I18:%.*]] = and i1 [[VAL0_I18]], [[VAL1_I18]] -; CHECK-NEXT: [[VAL1_I19:%.*]] = extractelement <32 x i1> [[VAL1]], i32 19 +; CHECK-NEXT: [[VAL1_I19:%.*]] = extractelement <32 x i1> [[VAL1]], i64 19 ; CHECK-NEXT: [[AND_I19:%.*]] = and i1 [[VAL0_I19]], [[VAL1_I19]] -; CHECK-NEXT: [[VAL1_I20:%.*]] = extractelement <32 x i1> [[VAL1]], i32 20 +; CHECK-NEXT: [[VAL1_I20:%.*]] = extractelement <32 x i1> [[VAL1]], i64 20 ; CHECK-NEXT: [[AND_I20:%.*]] = and i1 [[VAL0_I20]], [[VAL1_I20]] -; CHECK-NEXT: [[VAL1_I21:%.*]] = extractelement <32 x i1> [[VAL1]], i32 21 +; CHECK-NEXT: [[VAL1_I21:%.*]] = extractelement <32 x i1> [[VAL1]], i64 21 ; CHECK-NEXT: [[AND_I21:%.*]] = and i1 [[VAL0_I21]], [[VAL1_I21]] -; CHECK-NEXT: [[VAL1_I22:%.*]] = extractelement <32 x i1> [[VAL1]], i32 22 +; CHECK-NEXT: [[VAL1_I22:%.*]] = extractelement <32 x i1> [[VAL1]], i64 22 ; CHECK-NEXT: [[AND_I22:%.*]] = and i1 [[VAL0_I22]], [[VAL1_I22]] -; CHECK-NEXT: [[VAL1_I23:%.*]] = extractelement <32 x i1> [[VAL1]], i32 23 +; CHECK-NEXT: [[VAL1_I23:%.*]] = extractelement <32 x i1> [[VAL1]], i64 23 ; CHECK-NEXT: [[AND_I23:%.*]] = and i1 [[VAL0_I23]], [[VAL1_I23]] -; CHECK-NEXT: [[VAL1_I24:%.*]] = extractelement <32 x i1> [[VAL1]], i32 24 +; CHECK-NEXT: [[VAL1_I24:%.*]] = extractelement <32 x i1> [[VAL1]], i64 24 ; CHECK-NEXT: [[AND_I24:%.*]] = and i1 [[VAL0_I24]], [[VAL1_I24]] -; CHECK-NEXT: [[VAL1_I25:%.*]] = extractelement <32 x i1> [[VAL1]], i32 25 +; CHECK-NEXT: [[VAL1_I25:%.*]] = extractelement <32 x i1> [[VAL1]], i64 25 ; CHECK-NEXT: [[AND_I25:%.*]] = and i1 [[VAL0_I25]], [[VAL1_I25]] -; CHECK-NEXT: [[VAL1_I26:%.*]] = extractelement <32 x i1> [[VAL1]], i32 26 +; CHECK-NEXT: [[VAL1_I26:%.*]] = extractelement <32 x i1> [[VAL1]], i64 26 ; CHECK-NEXT: [[AND_I26:%.*]] = and i1 [[VAL0_I26]], [[VAL1_I26]] -; CHECK-NEXT: [[VAL1_I27:%.*]] = extractelement <32 x i1> [[VAL1]], i32 27 +; CHECK-NEXT: [[VAL1_I27:%.*]] = extractelement <32 x i1> [[VAL1]], i64 27 ; CHECK-NEXT: [[AND_I27:%.*]] = and i1 [[VAL0_I27]], [[VAL1_I27]] -; CHECK-NEXT: [[VAL1_I28:%.*]] = extractelement <32 x i1> [[VAL1]], i32 28 +; CHECK-NEXT: [[VAL1_I28:%.*]] = extractelement <32 x i1> [[VAL1]], i64 28 ; CHECK-NEXT: [[AND_I28:%.*]] = and i1 [[VAL0_I28]], [[VAL1_I28]] -; CHECK-NEXT: [[VAL1_I29:%.*]] = extractelement <32 x i1> [[VAL1]], i32 29 +; CHECK-NEXT: [[VAL1_I29:%.*]] = extractelement <32 x i1> [[VAL1]], i64 29 ; CHECK-NEXT: [[AND_I29:%.*]] = and i1 [[VAL0_I29]], [[VAL1_I29]] -; CHECK-NEXT: [[VAL1_I30:%.*]] = extractelement <32 x i1> [[VAL1]], i32 30 +; CHECK-NEXT: [[VAL1_I30:%.*]] = extractelement <32 x i1> [[VAL1]], i64 30 ; CHECK-NEXT: [[AND_I30:%.*]] = and i1 [[VAL0_I30]], [[VAL1_I30]] -; CHECK-NEXT: [[VAL1_I31:%.*]] = extractelement <32 x i1> [[VAL1]], i32 31 +; CHECK-NEXT: [[VAL1_I31:%.*]] = extractelement <32 x i1> [[VAL1]], i64 31 ; CHECK-NEXT: [[AND_I31:%.*]] = and i1 [[VAL0_I31]], [[VAL1_I31]] -; CHECK-NEXT: [[AND_UPTO0:%.*]] = insertelement <32 x i1> poison, i1 [[AND_I0]], i32 0 -; CHECK-NEXT: [[AND_UPTO1:%.*]] = insertelement <32 x i1> [[AND_UPTO0]], i1 [[AND_I1]], i32 1 -; CHECK-NEXT: [[AND_UPTO2:%.*]] = insertelement <32 x i1> [[AND_UPTO1]], i1 [[AND_I2]], i32 2 -; CHECK-NEXT: [[AND_UPTO3:%.*]] = insertelement <32 x i1> [[AND_UPTO2]], i1 [[AND_I3]], i32 3 -; CHECK-NEXT: [[AND_UPTO4:%.*]] = insertelement <32 x i1> [[AND_UPTO3]], i1 [[AND_I4]], i32 4 -; CHECK-NEXT: [[AND_UPTO5:%.*]] = insertelement <32 x i1> [[AND_UPTO4]], i1 [[AND_I5]], i32 5 -; CHECK-NEXT: [[AND_UPTO6:%.*]] = insertelement <32 x i1> [[AND_UPTO5]], i1 [[AND_I6]], i32 6 -; CHECK-NEXT: [[AND_UPTO7:%.*]] = insertelement <32 x i1> [[AND_UPTO6]], i1 [[AND_I7]], i32 7 -; CHECK-NEXT: [[AND_UPTO8:%.*]] = insertelement <32 x i1> [[AND_UPTO7]], i1 [[AND_I8]], i32 8 -; CHECK-NEXT: [[AND_UPTO9:%.*]] = insertelement <32 x i1> [[AND_UPTO8]], i1 [[AND_I9]], i32 9 -; CHECK-NEXT: [[AND_UPTO10:%.*]] = insertelement <32 x i1> [[AND_UPTO9]], i1 [[AND_I10]], i32 10 -; CHECK-NEXT: [[AND_UPTO11:%.*]] = insertelement <32 x i1> [[AND_UPTO10]], i1 [[AND_I11]], i32 11 -; CHECK-NEXT: [[AND_UPTO12:%.*]] = insertelement <32 x i1> [[AND_UPTO11]], i1 [[AND_I12]], i32 12 -; CHECK-NEXT: [[AND_UPTO13:%.*]] = insertelement <32 x i1> [[AND_UPTO12]], i1 [[AND_I13]], i32 13 -; CHECK-NEXT: [[AND_UPTO14:%.*]] = insertelement <32 x i1> [[AND_UPTO13]], i1 [[AND_I14]], i32 14 -; CHECK-NEXT: [[AND_UPTO15:%.*]] = insertelement <32 x i1> [[AND_UPTO14]], i1 [[AND_I15]], i32 15 -; CHECK-NEXT: [[AND_UPTO16:%.*]] = insertelement <32 x i1> [[AND_UPTO15]], i1 [[AND_I16]], i32 16 -; CHECK-NEXT: [[AND_UPTO17:%.*]] = insertelement <32 x i1> [[AND_UPTO16]], i1 [[AND_I17]], i32 17 -; CHECK-NEXT: [[AND_UPTO18:%.*]] = insertelement <32 x i1> [[AND_UPTO17]], i1 [[AND_I18]], i32 18 -; CHECK-NEXT: [[AND_UPTO19:%.*]] = insertelement <32 x i1> [[AND_UPTO18]], i1 [[AND_I19]], i32 19 -; CHECK-NEXT: [[AND_UPTO20:%.*]] = insertelement <32 x i1> [[AND_UPTO19]], i1 [[AND_I20]], i32 20 -; CHECK-NEXT: [[AND_UPTO21:%.*]] = insertelement <32 x i1> [[AND_UPTO20]], i1 [[AND_I21]], i32 21 -; CHECK-NEXT: [[AND_UPTO22:%.*]] = insertelement <32 x i1> [[AND_UPTO21]], i1 [[AND_I22]], i32 22 -; CHECK-NEXT: [[AND_UPTO23:%.*]] = insertelement <32 x i1> [[AND_UPTO22]], i1 [[AND_I23]], i32 23 -; CHECK-NEXT: [[AND_UPTO24:%.*]] = insertelement <32 x i1> [[AND_UPTO23]], i1 [[AND_I24]], i32 24 -; CHECK-NEXT: [[AND_UPTO25:%.*]] = insertelement <32 x i1> [[AND_UPTO24]], i1 [[AND_I25]], i32 25 -; CHECK-NEXT: [[AND_UPTO26:%.*]] = insertelement <32 x i1> [[AND_UPTO25]], i1 [[AND_I26]], i32 26 -; CHECK-NEXT: [[AND_UPTO27:%.*]] = insertelement <32 x i1> [[AND_UPTO26]], i1 [[AND_I27]], i32 27 -; CHECK-NEXT: [[AND_UPTO28:%.*]] = insertelement <32 x i1> [[AND_UPTO27]], i1 [[AND_I28]], i32 28 -; CHECK-NEXT: [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i32 29 -; CHECK-NEXT: [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i32 30 -; CHECK-NEXT: [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i32 31 +; CHECK-NEXT: [[AND_UPTO0:%.*]] = insertelement <32 x i1> poison, i1 [[AND_I0]], i64 0 +; CHECK-NEXT: [[AND_UPTO1:%.*]] = insertelement <32 x i1> [[AND_UPTO0]], i1 [[AND_I1]], i64 1 +; CHECK-NEXT: [[AND_UPTO2:%.*]] = insertelement <32 x i1> [[AND_UPTO1]], i1 [[AND_I2]], i64 2 +; CHECK-NEXT: [[AND_UPTO3:%.*]] = insertelement <32 x i1> [[AND_UPTO2]], i1 [[AND_I3]], i64 3 +; CHECK-NEXT: [[AND_UPTO4:%.*]] = insertelement <32 x i1> [[AND_UPTO3]], i1 [[AND_I4]], i64 4 +; CHECK-NEXT: [[AND_UPTO5:%.*]] = insertelement <32 x i1> [[AND_UPTO4]], i1 [[AND_I5]], i64 5 +; CHECK-NEXT: [[AND_UPTO6:%.*]] = insertelement <32 x i1> [[AND_UPTO5]], i1 [[AND_I6]], i64 6 +; CHECK-NEXT: [[AND_UPTO7:%.*]] = insertelement <32 x i1> [[AND_UPTO6]], i1 [[AND_I7]], i64 7 +; CHECK-NEXT: [[AND_UPTO8:%.*]] = insertelement <32 x i1> [[AND_UPTO7]], i1 [[AND_I8]], i64 8 +; CHECK-NEXT: [[AND_UPTO9:%.*]] = insertelement <32 x i1> [[AND_UPTO8]], i1 [[AND_I9]], i64 9 +; CHECK-NEXT: [[AND_UPTO10:%.*]] = insertelement <32 x i1> [[AND_UPTO9]], i1 [[AND_I10]], i64 10 +; CHECK-NEXT: [[AND_UPTO11:%.*]] = insertelement <32 x i1> [[AND_UPTO10]], i1 [[AND_I11]], i64 11 +; CHECK-NEXT: [[AND_UPTO12:%.*]] = insertelement <32 x i1> [[AND_UPTO11]], i1 [[AND_I12]], i64 12 +; CHECK-NEXT: [[AND_UPTO13:%.*]] = insertelement <32 x i1> [[AND_UPTO12]], i1 [[AND_I13]], i64 13 +; CHECK-NEXT: [[AND_UPTO14:%.*]] = insertelement <32 x i1> [[AND_UPTO13]], i1 [[AND_I14]], i64 14 +; CHECK-NEXT: [[AND_UPTO15:%.*]] = insertelement <32 x i1> [[AND_UPTO14]], i1 [[AND_I15]], i64 15 +; CHECK-NEXT: [[AND_UPTO16:%.*]] = insertelement <32 x i1> [[AND_UPTO15]], i1 [[AND_I16]], i64 16 +; CHECK-NEXT: [[AND_UPTO17:%.*]] = insertelement <32 x i1> [[AND_UPTO16]], i1 [[AND_I17]], i64 17 +; CHECK-NEXT: [[AND_UPTO18:%.*]] = insertelement <32 x i1> [[AND_UPTO17]], i1 [[AND_I18]], i64 18 +; CHECK-NEXT: [[AND_UPTO19:%.*]] = insertelement <32 x i1> [[AND_UPTO18]], i1 [[AND_I19]], i64 19 +; CHECK-NEXT: [[AND_UPTO20:%.*]] = insertelement <32 x i1> [[AND_UPTO19]], i1 [[AND_I20]], i64 20 +; CHECK-NEXT: [[AND_UPTO21:%.*]] = insertelement <32 x i1> [[AND_UPTO20]], i1 [[AND_I21]], i64 21 +; CHECK-NEXT: [[AND_UPTO22:%.*]] = insertelement <32 x i1> [[AND_UPTO21]], i1 [[AND_I22]], i64 22 +; CHECK-NEXT: [[AND_UPTO23:%.*]] = insertelement <32 x i1> [[AND_UPTO22]], i1 [[AND_I23]], i64 23 +; CHECK-NEXT: [[AND_UPTO24:%.*]] = insertelement <32 x i1> [[AND_UPTO23]], i1 [[AND_I24]], i64 24 +; CHECK-NEXT: [[AND_UPTO25:%.*]] = insertelement <32 x i1> [[AND_UPTO24]], i1 [[AND_I25]], i64 25 +; CHECK-NEXT: [[AND_UPTO26:%.*]] = insertelement <32 x i1> [[AND_UPTO25]], i1 [[AND_I26]], i64 26 +; CHECK-NEXT: [[AND_UPTO27:%.*]] = insertelement <32 x i1> [[AND_UPTO26]], i1 [[AND_I27]], i64 27 +; CHECK-NEXT: [[AND_UPTO28:%.*]] = insertelement <32 x i1> [[AND_UPTO27]], i1 [[AND_I28]], i64 28 +; CHECK-NEXT: [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i64 29 +; CHECK-NEXT: [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i64 30 +; CHECK-NEXT: [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i64 31 ; CHECK-NEXT: store <32 x i1> [[AND]], ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -559,17 +559,17 @@ ; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3 -; CHECK-NEXT: [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i32 0 -; CHECK-NEXT: [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR:%.*]], i32 0 +; CHECK-NEXT: [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR:%.*]], i64 0 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i64 0 ; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I0]], i32 0, i32 [[I_I0]] -; CHECK-NEXT: [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i32 1 -; CHECK-NEXT: [[PTR_I1:%.*]] = extractelement <4 x ptr> [[PTR]], i32 1 +; CHECK-NEXT: [[PTR_I1:%.*]] = extractelement <4 x ptr> [[PTR]], i64 1 +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i64 1 ; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I1]], i32 1, i32 [[I_I1]] -; CHECK-NEXT: [[I_I2:%.*]] = extractelement <4 x i32> [[I]], i32 2 -; CHECK-NEXT: [[PTR_I2:%.*]] = extractelement <4 x ptr> [[PTR]], i32 2 +; CHECK-NEXT: [[PTR_I2:%.*]] = extractelement <4 x ptr> [[PTR]], i64 2 +; CHECK-NEXT: [[I_I2:%.*]] = extractelement <4 x i32> [[I]], i64 2 ; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I2]], i32 2, i32 [[I_I2]] -; CHECK-NEXT: [[I_I3:%.*]] = extractelement <4 x i32> [[I]], i32 3 -; CHECK-NEXT: [[PTR_I3:%.*]] = extractelement <4 x ptr> [[PTR]], i32 3 +; CHECK-NEXT: [[PTR_I3:%.*]] = extractelement <4 x ptr> [[PTR]], i64 3 +; CHECK-NEXT: [[I_I3:%.*]] = extractelement <4 x i32> [[I]], i64 3 ; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I3]], i32 3, i32 [[I_I3]] ; CHECK-NEXT: store ptr [[VAL_I0]], ptr [[DEST]], align 32 ; CHECK-NEXT: store ptr [[VAL_I1]], ptr [[DEST_I1]], align 8 @@ -589,10 +589,10 @@ define <4 x float> @f14(<4 x float> %acc, i32 %count) { ; CHECK-LABEL: @f14( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i32 0 -; CHECK-NEXT: [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i32 1 -; CHECK-NEXT: [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i32 2 -; CHECK-NEXT: [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i32 3 +; CHECK-NEXT: [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i64 0 +; CHECK-NEXT: [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i64 1 +; CHECK-NEXT: [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i64 2 +; CHECK-NEXT: [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i64 3 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ] @@ -600,23 +600,23 @@ ; CHECK-NEXT: [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i32 0 -; CHECK-NEXT: [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i32 1 -; CHECK-NEXT: [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i32 2 -; CHECK-NEXT: [[THIS_ACC:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO2]], float [[THIS_ACC_I3]], i32 3 +; CHECK-NEXT: [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i64 0 +; CHECK-NEXT: [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i64 1 +; CHECK-NEXT: [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i64 2 +; CHECK-NEXT: [[THIS_ACC:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO2]], float [[THIS_ACC_I3]], i64 3 ; CHECK-NEXT: [[FOO:%.*]] = call <4 x float> @ext(<4 x float> [[THIS_ACC]]) -; CHECK-NEXT: [[FOO_I0:%.*]] = extractelement <4 x float> [[FOO]], i32 0 +; CHECK-NEXT: [[FOO_I0:%.*]] = extractelement <4 x float> [[FOO]], i64 0 ; CHECK-NEXT: [[NEXT_ACC_I0]] = fadd float [[THIS_ACC_I0]], [[FOO_I0]] -; CHECK-NEXT: [[FOO_I1:%.*]] = extractelement <4 x float> [[FOO]], i32 1 +; CHECK-NEXT: [[FOO_I1:%.*]] = extractelement <4 x float> [[FOO]], i64 1 ; CHECK-NEXT: [[NEXT_ACC_I1]] = fadd float [[THIS_ACC_I1]], [[FOO_I1]] -; CHECK-NEXT: [[FOO_I2:%.*]] = extractelement <4 x float> [[FOO]], i32 2 +; CHECK-NEXT: [[FOO_I2:%.*]] = extractelement <4 x float> [[FOO]], i64 2 ; CHECK-NEXT: [[NEXT_ACC_I2]] = fadd float [[THIS_ACC_I2]], [[FOO_I2]] -; CHECK-NEXT: [[FOO_I3:%.*]] = extractelement <4 x float> [[FOO]], i32 3 +; CHECK-NEXT: [[FOO_I3:%.*]] = extractelement <4 x float> [[FOO]], i64 3 ; CHECK-NEXT: [[NEXT_ACC_I3]] = fadd float [[THIS_ACC_I3]], [[FOO_I3]] -; CHECK-NEXT: [[NEXT_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEXT_ACC_I0]], i32 0 -; CHECK-NEXT: [[NEXT_ACC_UPTO1:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO0]], float [[NEXT_ACC_I1]], i32 1 -; CHECK-NEXT: [[NEXT_ACC_UPTO2:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO1]], float [[NEXT_ACC_I2]], i32 2 -; CHECK-NEXT: [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i32 3 +; CHECK-NEXT: [[NEXT_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEXT_ACC_I0]], i64 0 +; CHECK-NEXT: [[NEXT_ACC_UPTO1:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO0]], float [[NEXT_ACC_I1]], i64 1 +; CHECK-NEXT: [[NEXT_ACC_UPTO2:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO1]], float [[NEXT_ACC_I2]], i64 2 +; CHECK-NEXT: [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i64 3 ; CHECK-NEXT: [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] @@ -659,18 +659,18 @@ ; CHECK-NEXT: [[NEG_I1:%.*]] = fneg float [[VAL_I1]] ; CHECK-NEXT: [[NEG_I2:%.*]] = fneg float [[VAL_I2]] ; CHECK-NEXT: [[NEG_I3:%.*]] = fneg float [[VAL_I3]] -; CHECK-NEXT: [[NEG_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEG_I0]], i32 0 -; CHECK-NEXT: [[NEG_UPTO1:%.*]] = insertelement <4 x float> [[NEG_UPTO0]], float [[NEG_I1]], i32 1 -; CHECK-NEXT: [[NEG_UPTO2:%.*]] = insertelement <4 x float> [[NEG_UPTO1]], float [[NEG_I2]], i32 2 -; CHECK-NEXT: [[NEG:%.*]] = insertelement <4 x float> [[NEG_UPTO2]], float [[NEG_I3]], i32 3 +; CHECK-NEXT: [[NEG_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEG_I0]], i64 0 +; CHECK-NEXT: [[NEG_UPTO1:%.*]] = insertelement <4 x float> [[NEG_UPTO0]], float [[NEG_I1]], i64 1 +; CHECK-NEXT: [[NEG_UPTO2:%.*]] = insertelement <4 x float> [[NEG_UPTO1]], float [[NEG_I2]], i64 2 +; CHECK-NEXT: [[NEG:%.*]] = insertelement <4 x float> [[NEG_UPTO2]], float [[NEG_I3]], i64 3 ; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[NEG]]) -; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0 +; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i64 0 ; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00 -; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1 +; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i64 1 ; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00 -; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2 +; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i64 2 ; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00 -; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3 +; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i64 3 ; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00 ; CHECK-NEXT: [[SEL_I0:%.*]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00 ; CHECK-NEXT: [[SEL_I1:%.*]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00 @@ -713,14 +713,14 @@ ; Check that IR flags are preserved. define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) { ; CHECK-LABEL: @f16( -; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 -; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]] -; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 -; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1 +; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]] -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[RES]] ; %res = add nuw nsw <2 x i32> %i, %j @@ -728,14 +728,14 @@ } define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) { ; CHECK-LABEL: @f17( -; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 -; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]] -; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 -; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1 +; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = sdiv exact i32 [[I_I1]], [[J_I1]] -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[RES]] ; %res = sdiv exact <2 x i32> %i, %j @@ -743,14 +743,14 @@ } define <2 x float> @f18(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @f18( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]] -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = fadd fast float [[X_I1]], [[Y_I1]] -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x float> [[RES]] ; %res = fadd fast <2 x float> %x, %y @@ -758,12 +758,12 @@ } define <2 x float> @f19(<2 x float> %x) { ; CHECK-LABEL: @f19( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = fneg fast float [[X_I0]] -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = fneg fast float [[X_I1]] -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x float> [[RES]] ; %res = fneg fast <2 x float> %x @@ -771,14 +771,14 @@ } define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @f20( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]] -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = fcmp fast ogt float [[X_I1]], [[Y_I1]] -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i1> [[RES_UPTO0]], i1 [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i1> [[RES_UPTO0]], i1 [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x i1> [[RES]] ; %res = fcmp fast ogt <2 x float> %x, %y @@ -787,12 +787,12 @@ declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) define <2 x float> @f21(<2 x float> %x) { ; CHECK-LABEL: @f21( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]]) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]]) -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x float> [[RES]] ; %res = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) @@ -801,16 +801,16 @@ declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) { ; CHECK-LABEL: @f22( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 -; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 +; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]]) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 -; CHECK-NEXT: [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 +; CHECK-NEXT: [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.fma.f32(float [[X_I1]], float [[Y_I1]], float [[Z_I1]]) -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x float> [[RES]] ; %res = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) @@ -820,9 +820,9 @@ ; See https://reviews.llvm.org/D83101#2133062 define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) { ; CHECK-LABEL: @f23_crash( -; CHECK-NEXT: [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i32 0 -; CHECK-NEXT: [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SRCVEC_I0]], i32 0 -; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i32 1 +; CHECK-NEXT: [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i64 0 +; CHECK-NEXT: [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SRCVEC_I0]], i64 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[T1]] ; %v0 = extractelement <2 x i32> %srcvec, i32 0 diff --git a/llvm/test/Transforms/Scalarizer/constant-insertelement.ll b/llvm/test/Transforms/Scalarizer/constant-insertelement.ll --- a/llvm/test/Transforms/Scalarizer/constant-insertelement.ll +++ b/llvm/test/Transforms/Scalarizer/constant-insertelement.ll @@ -15,10 +15,10 @@ ; ALL-NEXT: [[VAL2_I1:%.*]] = shl i32 2, [[VAL0_I1]] ; ALL-NEXT: [[VAL2_I2:%.*]] = shl i32 3, [[VAL0_I2]] ; ALL-NEXT: [[VAL2_I3:%.*]] = shl i32 4, [[REPL:%.*]] -; ALL-NEXT: [[VAL2_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[VAL2_I0]], i32 0 -; ALL-NEXT: [[VAL2_UPTO1:%.*]] = insertelement <4 x i32> [[VAL2_UPTO0]], i32 [[VAL2_I1]], i32 1 -; ALL-NEXT: [[VAL2_UPTO2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO1]], i32 [[VAL2_I2]], i32 2 -; ALL-NEXT: [[VAL2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO2]], i32 [[VAL2_I3]], i32 3 +; ALL-NEXT: [[VAL2_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[VAL2_I0]], i64 0 +; ALL-NEXT: [[VAL2_UPTO1:%.*]] = insertelement <4 x i32> [[VAL2_UPTO0]], i32 [[VAL2_I1]], i64 1 +; ALL-NEXT: [[VAL2_UPTO2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO1]], i32 [[VAL2_I2]], i64 2 +; ALL-NEXT: [[VAL2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO2]], i32 [[VAL2_I3]], i64 3 ; ALL-NEXT: ret <4 x i32> [[VAL2]] ; %val0 = load <4 x i32> , ptr %src diff --git a/llvm/test/Transforms/Scalarizer/intrinsics.ll b/llvm/test/Transforms/Scalarizer/intrinsics.ll --- a/llvm/test/Transforms/Scalarizer/intrinsics.ll +++ b/llvm/test/Transforms/Scalarizer/intrinsics.ll @@ -34,12 +34,12 @@ define <2 x float> @scalarize_sqrt_v2f32(<2 x float> %x) #0 { ; CHECK-LABEL: @scalarize_sqrt_v2f32( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 ; CHECK-NEXT: [[SQRT_I0:%.*]] = call float @llvm.sqrt.f32(float [[X_I0]]) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[SQRT_I1:%.*]] = call float @llvm.sqrt.f32(float [[X_I1]]) -; CHECK-NEXT: [[SQRT_UPTO0:%.*]] = insertelement <2 x float> poison, float [[SQRT_I0]], i32 0 -; CHECK-NEXT: [[SQRT:%.*]] = insertelement <2 x float> [[SQRT_UPTO0]], float [[SQRT_I1]], i32 1 +; CHECK-NEXT: [[SQRT_UPTO0:%.*]] = insertelement <2 x float> poison, float [[SQRT_I0]], i64 0 +; CHECK-NEXT: [[SQRT:%.*]] = insertelement <2 x float> [[SQRT_UPTO0]], float [[SQRT_I1]], i64 1 ; CHECK-NEXT: ret <2 x float> [[SQRT]] ; %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) @@ -48,14 +48,14 @@ define <2 x float> @scalarize_minnum_v2f32(<2 x float> %x, <2 x float> %y) #0 { ; CHECK-LABEL: @scalarize_minnum_v2f32( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 ; CHECK-NEXT: [[MINNUM_I0:%.*]] = call float @llvm.minnum.f32(float [[X_I0]], float [[Y_I0]]) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 ; CHECK-NEXT: [[MINNUM_I1:%.*]] = call float @llvm.minnum.f32(float [[X_I1]], float [[Y_I1]]) -; CHECK-NEXT: [[MINNUM_UPTO0:%.*]] = insertelement <2 x float> poison, float [[MINNUM_I0]], i32 0 -; CHECK-NEXT: [[MINNUM:%.*]] = insertelement <2 x float> [[MINNUM_UPTO0]], float [[MINNUM_I1]], i32 1 +; CHECK-NEXT: [[MINNUM_UPTO0:%.*]] = insertelement <2 x float> poison, float [[MINNUM_I0]], i64 0 +; CHECK-NEXT: [[MINNUM:%.*]] = insertelement <2 x float> [[MINNUM_UPTO0]], float [[MINNUM_I1]], i64 1 ; CHECK-NEXT: ret <2 x float> [[MINNUM]] ; %minnum = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> %y) @@ -64,14 +64,14 @@ define <2 x float> @scalarize_minimum_v2f32(<2 x float> %x, <2 x float> %y) #0 { ; CHECK-LABEL: @scalarize_minimum_v2f32( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 ; CHECK-NEXT: [[MINIMUM_I0:%.*]] = call float @llvm.minimum.f32(float [[X_I0]], float [[Y_I0]]) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 ; CHECK-NEXT: [[MINIMUM_I1:%.*]] = call float @llvm.minimum.f32(float [[X_I1]], float [[Y_I1]]) -; CHECK-NEXT: [[MINIMUM_UPTO0:%.*]] = insertelement <2 x float> poison, float [[MINIMUM_I0]], i32 0 -; CHECK-NEXT: [[MINIMUM:%.*]] = insertelement <2 x float> [[MINIMUM_UPTO0]], float [[MINIMUM_I1]], i32 1 +; CHECK-NEXT: [[MINIMUM_UPTO0:%.*]] = insertelement <2 x float> poison, float [[MINIMUM_I0]], i64 0 +; CHECK-NEXT: [[MINIMUM:%.*]] = insertelement <2 x float> [[MINIMUM_UPTO0]], float [[MINIMUM_I1]], i64 1 ; CHECK-NEXT: ret <2 x float> [[MINIMUM]] ; %minimum = call <2 x float> @llvm.minimum.v2f32(<2 x float> %x, <2 x float> %y) @@ -80,14 +80,14 @@ define <2 x float> @scalarize_maximum_v2f32(<2 x float> %x, <2 x float> %y) #0 { ; CHECK-LABEL: @scalarize_maximum_v2f32( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 ; CHECK-NEXT: [[MAXIMUM_I0:%.*]] = call float @llvm.maximum.f32(float [[X_I0]], float [[Y_I0]]) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 ; CHECK-NEXT: [[MAXIMUM_I1:%.*]] = call float @llvm.maximum.f32(float [[X_I1]], float [[Y_I1]]) -; CHECK-NEXT: [[MAXIMUM_UPTO0:%.*]] = insertelement <2 x float> poison, float [[MAXIMUM_I0]], i32 0 -; CHECK-NEXT: [[MAXIMUM:%.*]] = insertelement <2 x float> [[MAXIMUM_UPTO0]], float [[MAXIMUM_I1]], i32 1 +; CHECK-NEXT: [[MAXIMUM_UPTO0:%.*]] = insertelement <2 x float> poison, float [[MAXIMUM_I0]], i64 0 +; CHECK-NEXT: [[MAXIMUM:%.*]] = insertelement <2 x float> [[MAXIMUM_UPTO0]], float [[MAXIMUM_I1]], i64 1 ; CHECK-NEXT: ret <2 x float> [[MAXIMUM]] ; %maximum = call <2 x float> @llvm.maximum.v2f32(<2 x float> %x, <2 x float> %y) @@ -96,16 +96,16 @@ define <2 x float> @scalarize_fma_v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 { ; CHECK-LABEL: @scalarize_fma_v2f32( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0 -; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 +; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i64 0 ; CHECK-NEXT: [[FMA_I0:%.*]] = call float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]]) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1 -; CHECK-NEXT: [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 +; CHECK-NEXT: [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i64 1 ; CHECK-NEXT: [[FMA_I1:%.*]] = call float @llvm.fma.f32(float [[X_I1]], float [[Y_I1]], float [[Z_I1]]) -; CHECK-NEXT: [[FMA_UPTO0:%.*]] = insertelement <2 x float> poison, float [[FMA_I0]], i32 0 -; CHECK-NEXT: [[FMA:%.*]] = insertelement <2 x float> [[FMA_UPTO0]], float [[FMA_I1]], i32 1 +; CHECK-NEXT: [[FMA_UPTO0:%.*]] = insertelement <2 x float> poison, float [[FMA_I0]], i64 0 +; CHECK-NEXT: [[FMA:%.*]] = insertelement <2 x float> [[FMA_UPTO0]], float [[FMA_I1]], i64 1 ; CHECK-NEXT: ret <2 x float> [[FMA]] ; %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) @@ -114,12 +114,12 @@ define <2 x i32> @scalarize_bswap_v2i32(<2 x i32> %x) #0 { ; CHECK-LABEL: @scalarize_bswap_v2i32( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x i32> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[BSWAP_I0:%.*]] = call i32 @llvm.bswap.i32(i32 [[X_I0]]) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x i32> [[X]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x i32> [[X]], i64 1 ; CHECK-NEXT: [[BSWAP_I1:%.*]] = call i32 @llvm.bswap.i32(i32 [[X_I1]]) -; CHECK-NEXT: [[BSWAP_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[BSWAP_I0]], i32 0 -; CHECK-NEXT: [[BSWAP:%.*]] = insertelement <2 x i32> [[BSWAP_UPTO0]], i32 [[BSWAP_I1]], i32 1 +; CHECK-NEXT: [[BSWAP_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[BSWAP_I0]], i64 0 +; CHECK-NEXT: [[BSWAP:%.*]] = insertelement <2 x i32> [[BSWAP_UPTO0]], i32 [[BSWAP_I1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[BSWAP]] ; %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %x) @@ -128,12 +128,12 @@ define <2 x i32> @scalarize_ctlz_v2i32(<2 x i32> %x) #0 { ; CHECK-LABEL: @scalarize_ctlz_v2i32( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x i32> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[CTLZ_I0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X_I0]], i1 true) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x i32> [[X]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x i32> [[X]], i64 1 ; CHECK-NEXT: [[CTLZ_I1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X_I1]], i1 true) -; CHECK-NEXT: [[CTLZ_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[CTLZ_I0]], i32 0 -; CHECK-NEXT: [[CTLZ:%.*]] = insertelement <2 x i32> [[CTLZ_UPTO0]], i32 [[CTLZ_I1]], i32 1 +; CHECK-NEXT: [[CTLZ_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[CTLZ_I0]], i64 0 +; CHECK-NEXT: [[CTLZ:%.*]] = insertelement <2 x i32> [[CTLZ_UPTO0]], i32 [[CTLZ_I1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[CTLZ]] ; %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 true) @@ -142,12 +142,12 @@ define <2 x float> @scalarize_powi_v2f32(<2 x float> %x, i32 %y) #0 { ; CHECK-LABEL: @scalarize_powi_v2f32( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 ; CHECK-NEXT: [[POWI_I0:%.*]] = call float @llvm.powi.f32.i32(float [[X_I0]], i32 [[Y:%.*]]) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[POWI_I1:%.*]] = call float @llvm.powi.f32.i32(float [[X_I1]], i32 [[Y]]) -; CHECK-NEXT: [[POWI_UPTO0:%.*]] = insertelement <2 x float> poison, float [[POWI_I0]], i32 0 -; CHECK-NEXT: [[POWI:%.*]] = insertelement <2 x float> [[POWI_UPTO0]], float [[POWI_I1]], i32 1 +; CHECK-NEXT: [[POWI_UPTO0:%.*]] = insertelement <2 x float> poison, float [[POWI_I0]], i64 0 +; CHECK-NEXT: [[POWI:%.*]] = insertelement <2 x float> [[POWI_UPTO0]], float [[POWI_I1]], i64 1 ; CHECK-NEXT: ret <2 x float> [[POWI]] ; %powi = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> %x, i32 %y) @@ -156,12 +156,12 @@ define <2 x i32> @scalarize_smul_fix_sat_v2i32(<2 x i32> %x) #0 { ; CHECK-LABEL: @scalarize_smul_fix_sat_v2i32( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x i32> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[SMULFIXSAT_I0:%.*]] = call i32 @llvm.smul.fix.sat.i32(i32 [[X_I0]], i32 5, i32 31) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x i32> [[X]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x i32> [[X]], i64 1 ; CHECK-NEXT: [[SMULFIXSAT_I1:%.*]] = call i32 @llvm.smul.fix.sat.i32(i32 [[X_I1]], i32 19, i32 31) -; CHECK-NEXT: [[SMULFIXSAT_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SMULFIXSAT_I0]], i32 0 -; CHECK-NEXT: [[SMULFIXSAT:%.*]] = insertelement <2 x i32> [[SMULFIXSAT_UPTO0]], i32 [[SMULFIXSAT_I1]], i32 1 +; CHECK-NEXT: [[SMULFIXSAT_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SMULFIXSAT_I0]], i64 0 +; CHECK-NEXT: [[SMULFIXSAT:%.*]] = insertelement <2 x i32> [[SMULFIXSAT_UPTO0]], i32 [[SMULFIXSAT_I1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[SMULFIXSAT]] ; %smulfixsat = call <2 x i32> @llvm.smul.fix.sat.v2i32(<2 x i32> %x, <2 x i32> , i32 31) @@ -170,12 +170,12 @@ define <2 x i32> @scalarize_umul_fix_sat_v2i32(<2 x i32> %x) #0 { ; CHECK-LABEL: @scalarize_umul_fix_sat_v2i32( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x i32> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[UMULFIXSAT_I0:%.*]] = call i32 @llvm.umul.fix.sat.i32(i32 [[X_I0]], i32 5, i32 31) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x i32> [[X]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x i32> [[X]], i64 1 ; CHECK-NEXT: [[UMULFIXSAT_I1:%.*]] = call i32 @llvm.umul.fix.sat.i32(i32 [[X_I1]], i32 19, i32 31) -; CHECK-NEXT: [[UMULFIXSAT_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[UMULFIXSAT_I0]], i32 0 -; CHECK-NEXT: [[UMULFIXSAT:%.*]] = insertelement <2 x i32> [[UMULFIXSAT_UPTO0]], i32 [[UMULFIXSAT_I1]], i32 1 +; CHECK-NEXT: [[UMULFIXSAT_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[UMULFIXSAT_I0]], i64 0 +; CHECK-NEXT: [[UMULFIXSAT:%.*]] = insertelement <2 x i32> [[UMULFIXSAT_UPTO0]], i32 [[UMULFIXSAT_I1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[UMULFIXSAT]] ; %umulfixsat = call <2 x i32> @llvm.umul.fix.sat.v2i32(<2 x i32> %x, <2 x i32> , i32 31) @@ -184,12 +184,12 @@ define <2 x i32> @scalarize_fptosi_sat(<2 x float> %x) #0 { ; CHECK-LABEL: @scalarize_fptosi_sat( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 ; CHECK-NEXT: [[SAT_I0:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[X_I0]]) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[SAT_I1:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[X_I1]]) -; CHECK-NEXT: [[SAT_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SAT_I0]], i32 0 -; CHECK-NEXT: [[SAT:%.*]] = insertelement <2 x i32> [[SAT_UPTO0]], i32 [[SAT_I1]], i32 1 +; CHECK-NEXT: [[SAT_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SAT_I0]], i64 0 +; CHECK-NEXT: [[SAT:%.*]] = insertelement <2 x i32> [[SAT_UPTO0]], i32 [[SAT_I1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[SAT]] ; %sat = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %x) @@ -198,12 +198,12 @@ define <2 x i32> @scalarize_fptoui_sat(<2 x float> %x) #0 { ; CHECK-LABEL: @scalarize_fptoui_sat( -; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 ; CHECK-NEXT: [[SAT_I0:%.*]] = call i32 @llvm.fptoui.sat.i32.f32(float [[X_I0]]) -; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1 +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 ; CHECK-NEXT: [[SAT_I1:%.*]] = call i32 @llvm.fptoui.sat.i32.f32(float [[X_I1]]) -; CHECK-NEXT: [[SAT_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SAT_I0]], i32 0 -; CHECK-NEXT: [[SAT:%.*]] = insertelement <2 x i32> [[SAT_UPTO0]], i32 [[SAT_I1]], i32 1 +; CHECK-NEXT: [[SAT_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SAT_I0]], i64 0 +; CHECK-NEXT: [[SAT:%.*]] = insertelement <2 x i32> [[SAT_UPTO0]], i32 [[SAT_I1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[SAT]] ; %sat = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %x) diff --git a/llvm/test/Transforms/Scalarizer/min-bits.ll b/llvm/test/Transforms/Scalarizer/min-bits.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Scalarizer/min-bits.ll @@ -0,0 +1,998 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s --check-prefixes=CHECK,MIN_LE16,DEFAULT +; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -scalarize-min-bits=16 -S | FileCheck %s --check-prefixes=CHECK,MIN_LE16,MIN_GE16 +; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -scalarize-min-bits=32 -S | FileCheck %s --check-prefixes=CHECK,MIN_GE16,MIN32 +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +define void @load_add_store_v2i16(ptr %pa, ptr %pb) { +; MIN_LE16-LABEL: @load_add_store_v2i16( +; MIN_LE16-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1 +; MIN_LE16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8 +; MIN_LE16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 +; MIN_LE16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 +; MIN_LE16-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 8 +; MIN_LE16-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2 +; MIN_LE16-NEXT: [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]] +; MIN_LE16-NEXT: [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]] +; MIN_LE16-NEXT: store i16 [[C_I0]], ptr [[PA]], align 8 +; MIN_LE16-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 2 +; MIN_LE16-NEXT: ret void +; +; MIN32-LABEL: @load_add_store_v2i16( +; MIN32-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8 +; MIN32-NEXT: [[B:%.*]] = load <2 x i16>, ptr [[PB:%.*]], align 8 +; MIN32-NEXT: [[C:%.*]] = add <2 x i16> [[A]], [[B]] +; MIN32-NEXT: store <2 x i16> [[C]], ptr [[PA]], align 8 +; MIN32-NEXT: ret void +; + %a = load <2 x i16>, ptr %pa, align 8 + %b = load <2 x i16>, ptr %pb, align 8 + %c = add <2 x i16> %a, %b + store <2 x i16> %c, ptr %pa, align 8 + ret void +} + +define void @load_add_store_v3i16(ptr %pa, ptr %pb) { +; MIN_LE16-LABEL: @load_add_store_v3i16( +; MIN_LE16-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1 +; MIN_LE16-NEXT: [[PB_I2:%.*]] = getelementptr i16, ptr [[PB]], i32 2 +; MIN_LE16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8 +; MIN_LE16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 +; MIN_LE16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 +; MIN_LE16-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2 +; MIN_LE16-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4 +; MIN_LE16-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 8 +; MIN_LE16-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2 +; MIN_LE16-NEXT: [[B_I2:%.*]] = load i16, ptr [[PB_I2]], align 4 +; MIN_LE16-NEXT: [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]] +; MIN_LE16-NEXT: [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]] +; MIN_LE16-NEXT: [[C_I2:%.*]] = add i16 [[A_I2]], [[B_I2]] +; MIN_LE16-NEXT: store i16 [[C_I0]], ptr [[PA]], align 8 +; MIN_LE16-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 2 +; MIN_LE16-NEXT: store i16 [[C_I2]], ptr [[PA_I2]], align 4 +; MIN_LE16-NEXT: ret void +; +; MIN32-LABEL: @load_add_store_v3i16( +; MIN32-NEXT: [[PB_I1:%.*]] = getelementptr <2 x i16>, ptr [[PB:%.*]], i32 1 +; MIN32-NEXT: [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8 +; MIN32-NEXT: [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1 +; MIN32-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 4 +; MIN32-NEXT: [[B_I0:%.*]] = load <2 x i16>, ptr [[PB]], align 8 +; MIN32-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 4 +; MIN32-NEXT: [[C_I0:%.*]] = add <2 x i16> [[A_I0]], [[B_I0]] +; MIN32-NEXT: [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]] +; MIN32-NEXT: store <2 x i16> [[C_I0]], ptr [[PA]], align 8 +; MIN32-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 4 +; MIN32-NEXT: ret void +; + %a = load <3 x i16>, ptr %pa, align 8 + %b = load <3 x i16>, ptr %pb, align 8 + %c = add <3 x i16> %a, %b + store <3 x i16> %c, ptr %pa, align 8 + ret void +} + +define void @load_add_store_v4i16(ptr %pa, ptr %pb) { +; MIN_LE16-LABEL: @load_add_store_v4i16( +; MIN_LE16-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1 +; MIN_LE16-NEXT: [[PB_I2:%.*]] = getelementptr i16, ptr [[PB]], i32 2 +; MIN_LE16-NEXT: [[PB_I3:%.*]] = getelementptr i16, ptr [[PB]], i32 3 +; MIN_LE16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8 +; MIN_LE16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 +; MIN_LE16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 +; MIN_LE16-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2 +; MIN_LE16-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4 +; MIN_LE16-NEXT: [[PA_I3:%.*]] = getelementptr i16, ptr [[PA]], i32 3 +; MIN_LE16-NEXT: [[A_I3:%.*]] = load i16, ptr [[PA_I3]], align 2 +; MIN_LE16-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 8 +; MIN_LE16-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2 +; MIN_LE16-NEXT: [[B_I2:%.*]] = load i16, ptr [[PB_I2]], align 4 +; MIN_LE16-NEXT: [[B_I3:%.*]] = load i16, ptr [[PB_I3]], align 2 +; MIN_LE16-NEXT: [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]] +; MIN_LE16-NEXT: [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]] +; MIN_LE16-NEXT: [[C_I2:%.*]] = add i16 [[A_I2]], [[B_I2]] +; MIN_LE16-NEXT: [[C_I3:%.*]] = add i16 [[A_I3]], [[B_I3]] +; MIN_LE16-NEXT: store i16 [[C_I0]], ptr [[PA]], align 8 +; MIN_LE16-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 2 +; MIN_LE16-NEXT: store i16 [[C_I2]], ptr [[PA_I2]], align 4 +; MIN_LE16-NEXT: store i16 [[C_I3]], ptr [[PA_I3]], align 2 +; MIN_LE16-NEXT: ret void +; +; MIN32-LABEL: @load_add_store_v4i16( +; MIN32-NEXT: [[PB_I1:%.*]] = getelementptr <2 x i16>, ptr [[PB:%.*]], i32 1 +; MIN32-NEXT: [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8 +; MIN32-NEXT: [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1 +; MIN32-NEXT: [[A_I1:%.*]] = load <2 x i16>, ptr [[PA_I1]], align 4 +; MIN32-NEXT: [[B_I0:%.*]] = load <2 x i16>, ptr [[PB]], align 8 +; MIN32-NEXT: [[B_I1:%.*]] = load <2 x i16>, ptr [[PB_I1]], align 4 +; MIN32-NEXT: [[C_I0:%.*]] = add <2 x i16> [[A_I0]], [[B_I0]] +; MIN32-NEXT: [[C_I1:%.*]] = add <2 x i16> [[A_I1]], [[B_I1]] +; MIN32-NEXT: store <2 x i16> [[C_I0]], ptr [[PA]], align 8 +; MIN32-NEXT: store <2 x i16> [[C_I1]], ptr [[PA_I1]], align 4 +; MIN32-NEXT: ret void +; + %a = load <4 x i16>, ptr %pa, align 8 + %b = load <4 x i16>, ptr %pb, align 8 + %c = add <4 x i16> %a, %b + store <4 x i16> %c, ptr %pa, align 8 + ret void +} + +define <2 x half> @select_uniform_condition_v2f16(<2 x half> %a, <2 x half> %b, i1 %cc) { +; MIN_LE16-LABEL: @select_uniform_condition_v2f16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0 +; MIN_LE16-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]] +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1 +; MIN_LE16-NEXT: [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1 +; MIN_LE16-NEXT: [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]] +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; MIN_LE16-NEXT: ret <2 x half> [[R]] +; +; MIN32-LABEL: @select_uniform_condition_v2f16( +; MIN32-NEXT: [[R:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A:%.*]], <2 x half> [[B:%.*]] +; MIN32-NEXT: ret <2 x half> [[R]] +; + %r = select i1 %cc, <2 x half> %a, <2 x half> %b + ret <2 x half> %r +} + +define <3 x half> @select_uniform_condition_v3f16(<3 x half> %a, <3 x half> %b, i1 %cc) { +; MIN_LE16-LABEL: @select_uniform_condition_v3f16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0 +; MIN_LE16-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]] +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1 +; MIN_LE16-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1 +; MIN_LE16-NEXT: [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]] +; MIN_LE16-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2 +; MIN_LE16-NEXT: [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2 +; MIN_LE16-NEXT: [[R_I2:%.*]] = select i1 [[CC]], half [[A_I2]], half [[B_I2]] +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0 +; MIN_LE16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; MIN_LE16-NEXT: ret <3 x half> [[R]] +; +; MIN32-LABEL: @select_uniform_condition_v3f16( +; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> +; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <3 x half> [[B:%.*]], <3 x half> poison, <2 x i32> +; MIN32-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A_I0]], <2 x half> [[B_I0]] +; MIN32-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2 +; MIN32-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 2 +; MIN32-NEXT: [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]] +; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> +; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2 +; MIN32-NEXT: ret <3 x half> [[R]] +; + %r = select i1 %cc, <3 x half> %a, <3 x half> %b + ret <3 x half> %r +} + +define <4 x half> @select_uniform_condition_v4f16(<4 x half> %a, <4 x half> %b, i1 %cc) { +; MIN_LE16-LABEL: @select_uniform_condition_v4f16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0 +; MIN_LE16-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]] +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 +; MIN_LE16-NEXT: [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1 +; MIN_LE16-NEXT: [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]] +; MIN_LE16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 +; MIN_LE16-NEXT: [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2 +; MIN_LE16-NEXT: [[R_I2:%.*]] = select i1 [[CC]], half [[A_I2]], half [[B_I2]] +; MIN_LE16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 +; MIN_LE16-NEXT: [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3 +; MIN_LE16-NEXT: [[R_I3:%.*]] = select i1 [[CC]], half [[A_I3]], half [[B_I3]] +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0 +; MIN_LE16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; MIN_LE16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3 +; MIN_LE16-NEXT: ret <4 x half> [[R]] +; +; MIN32-LABEL: @select_uniform_condition_v4f16( +; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> +; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <4 x half> [[B:%.*]], <4 x half> poison, <2 x i32> +; MIN32-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A_I0]], <2 x half> [[B_I0]] +; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> +; MIN32-NEXT: [[B_I1:%.*]] = shufflevector <4 x half> [[B]], <4 x half> poison, <2 x i32> +; MIN32-NEXT: [[R_I1:%.*]] = select i1 [[CC]], <2 x half> [[A_I1]], <2 x half> [[B_I1]] +; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> +; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> +; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> +; MIN32-NEXT: ret <4 x half> [[R]] +; + %r = select i1 %cc, <4 x half> %a, <4 x half> %b + ret <4 x half> %r +} + +define <4 x half> @select_vector_condition_v4f16(<4 x half> %a, <4 x half> %b, <4 x i1> %cc) { +; DEFAULT-LABEL: @select_vector_condition_v4f16( +; DEFAULT-NEXT: [[CC_I0:%.*]] = extractelement <4 x i1> [[CC:%.*]], i64 0 +; DEFAULT-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 +; DEFAULT-NEXT: [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0 +; DEFAULT-NEXT: [[R_I0:%.*]] = select i1 [[CC_I0]], half [[A_I0]], half [[B_I0]] +; DEFAULT-NEXT: [[CC_I1:%.*]] = extractelement <4 x i1> [[CC]], i64 1 +; DEFAULT-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 +; DEFAULT-NEXT: [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1 +; DEFAULT-NEXT: [[R_I1:%.*]] = select i1 [[CC_I1]], half [[A_I1]], half [[B_I1]] +; DEFAULT-NEXT: [[CC_I2:%.*]] = extractelement <4 x i1> [[CC]], i64 2 +; DEFAULT-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 +; DEFAULT-NEXT: [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2 +; DEFAULT-NEXT: [[R_I2:%.*]] = select i1 [[CC_I2]], half [[A_I2]], half [[B_I2]] +; DEFAULT-NEXT: [[CC_I3:%.*]] = extractelement <4 x i1> [[CC]], i64 3 +; DEFAULT-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 +; DEFAULT-NEXT: [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3 +; DEFAULT-NEXT: [[R_I3:%.*]] = select i1 [[CC_I3]], half [[A_I3]], half [[B_I3]] +; DEFAULT-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0 +; DEFAULT-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; DEFAULT-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; DEFAULT-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3 +; DEFAULT-NEXT: ret <4 x half> [[R]] +; +; MIN_GE16-LABEL: @select_vector_condition_v4f16( +; MIN_GE16-NEXT: [[R:%.*]] = select <4 x i1> [[CC:%.*]], <4 x half> [[A:%.*]], <4 x half> [[B:%.*]] +; MIN_GE16-NEXT: ret <4 x half> [[R]] +; + %r = select <4 x i1> %cc, <4 x half> %a, <4 x half> %b + ret <4 x half> %r +} + +define <2 x half> @unary_v2f16(<2 x half> %a) { +; MIN_LE16-LABEL: @unary_v2f16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[R_I0:%.*]] = fneg half [[A_I0]] +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1 +; MIN_LE16-NEXT: [[R_I1:%.*]] = fneg half [[A_I1]] +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; MIN_LE16-NEXT: ret <2 x half> [[R]] +; +; MIN32-LABEL: @unary_v2f16( +; MIN32-NEXT: [[R:%.*]] = fneg <2 x half> [[A:%.*]] +; MIN32-NEXT: ret <2 x half> [[R]] +; + %r = fneg <2 x half> %a + ret <2 x half> %r +} + +define <3 x half> @unary_v3f16(<3 x half> %a) { +; MIN_LE16-LABEL: @unary_v3f16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[R_I0:%.*]] = fneg half [[A_I0]] +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1 +; MIN_LE16-NEXT: [[R_I1:%.*]] = fneg half [[A_I1]] +; MIN_LE16-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2 +; MIN_LE16-NEXT: [[R_I2:%.*]] = fneg half [[A_I2]] +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0 +; MIN_LE16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; MIN_LE16-NEXT: ret <3 x half> [[R]] +; +; MIN32-LABEL: @unary_v3f16( +; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> +; MIN32-NEXT: [[R_I0:%.*]] = fneg <2 x half> [[A_I0]] +; MIN32-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2 +; MIN32-NEXT: [[R_I1:%.*]] = fneg half [[A_I1]] +; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> +; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2 +; MIN32-NEXT: ret <3 x half> [[R]] +; + %r = fneg <3 x half> %a + ret <3 x half> %r +} + +define <4 x half> @unary_v4f16(<4 x half> %a) { +; MIN_LE16-LABEL: @unary_v4f16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[R_I0:%.*]] = fneg half [[A_I0]] +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 +; MIN_LE16-NEXT: [[R_I1:%.*]] = fneg half [[A_I1]] +; MIN_LE16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 +; MIN_LE16-NEXT: [[R_I2:%.*]] = fneg half [[A_I2]] +; MIN_LE16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 +; MIN_LE16-NEXT: [[R_I3:%.*]] = fneg half [[A_I3]] +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0 +; MIN_LE16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; MIN_LE16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3 +; MIN_LE16-NEXT: ret <4 x half> [[R]] +; +; MIN32-LABEL: @unary_v4f16( +; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> +; MIN32-NEXT: [[R_I0:%.*]] = fneg <2 x half> [[A_I0]] +; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> +; MIN32-NEXT: [[R_I1:%.*]] = fneg <2 x half> [[A_I1]] +; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> +; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> +; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> +; MIN32-NEXT: ret <4 x half> [[R]] +; + %r = fneg <4 x half> %a + ret <4 x half> %r +} + +define <2 x i16> @fptosi_v2f16(<2 x half> %a) { +; MIN_LE16-LABEL: @fptosi_v2f16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[R_I0:%.*]] = fptosi half [[A_I0]] to i16 +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1 +; MIN_LE16-NEXT: [[R_I1:%.*]] = fptosi half [[A_I1]] to i16 +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x i16> poison, i16 [[R_I0]], i64 0 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <2 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1 +; MIN_LE16-NEXT: ret <2 x i16> [[R]] +; +; MIN32-LABEL: @fptosi_v2f16( +; MIN32-NEXT: [[R:%.*]] = fptosi <2 x half> [[A:%.*]] to <2 x i16> +; MIN32-NEXT: ret <2 x i16> [[R]] +; + %r = fptosi <2 x half> %a to <2 x i16> + ret <2 x i16> %r +} + +define <3 x i16> @fptosi_v3f16(<3 x half> %a) { +; MIN_LE16-LABEL: @fptosi_v3f16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[R_I0:%.*]] = fptosi half [[A_I0]] to i16 +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1 +; MIN_LE16-NEXT: [[R_I1:%.*]] = fptosi half [[A_I1]] to i16 +; MIN_LE16-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2 +; MIN_LE16-NEXT: [[R_I2:%.*]] = fptosi half [[A_I2]] to i16 +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x i16> poison, i16 [[R_I0]], i64 0 +; MIN_LE16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <3 x i16> [[R_UPTO1]], i16 [[R_I2]], i64 2 +; MIN_LE16-NEXT: ret <3 x i16> [[R]] +; +; MIN32-LABEL: @fptosi_v3f16( +; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> +; MIN32-NEXT: [[R_I0:%.*]] = fptosi <2 x half> [[A_I0]] to <2 x i16> +; MIN32-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2 +; MIN32-NEXT: [[R_I1:%.*]] = fptosi half [[A_I1]] to i16 +; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[R_I0]], <2 x i16> [[R_I0]], <3 x i32> +; MIN32-NEXT: [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[R_I1]], i64 2 +; MIN32-NEXT: ret <3 x i16> [[R]] +; + %r = fptosi <3 x half> %a to <3 x i16> + ret <3 x i16> %r +} + +define <4 x i16> @fptosi_v4f16(<4 x half> %a) { +; MIN_LE16-LABEL: @fptosi_v4f16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[R_I0:%.*]] = fptosi half [[A_I0]] to i16 +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 +; MIN_LE16-NEXT: [[R_I1:%.*]] = fptosi half [[A_I1]] to i16 +; MIN_LE16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 +; MIN_LE16-NEXT: [[R_I2:%.*]] = fptosi half [[A_I2]] to i16 +; MIN_LE16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 +; MIN_LE16-NEXT: [[R_I3:%.*]] = fptosi half [[A_I3]] to i16 +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x i16> poison, i16 [[R_I0]], i64 0 +; MIN_LE16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1 +; MIN_LE16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x i16> [[R_UPTO1]], i16 [[R_I2]], i64 2 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <4 x i16> [[R_UPTO2]], i16 [[R_I3]], i64 3 +; MIN_LE16-NEXT: ret <4 x i16> [[R]] +; +; MIN32-LABEL: @fptosi_v4f16( +; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> +; MIN32-NEXT: [[R_I0:%.*]] = fptosi <2 x half> [[A_I0]] to <2 x i16> +; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> +; MIN32-NEXT: [[R_I1:%.*]] = fptosi <2 x half> [[A_I1]] to <2 x i16> +; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[R_I0]], <2 x i16> [[R_I0]], <4 x i32> +; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[R_I1]], <2 x i16> [[R_I1]], <4 x i32> +; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP2]], <4 x i32> +; MIN32-NEXT: ret <4 x i16> [[R]] +; + %r = fptosi <4 x half> %a to <4 x i16> + ret <4 x i16> %r +} + +define <4 x float> @fpext_v4f16(<4 x half> %a) { +; MIN_LE16-LABEL: @fpext_v4f16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[R_I0:%.*]] = fpext half [[A_I0]] to float +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 +; MIN_LE16-NEXT: [[R_I1:%.*]] = fpext half [[A_I1]] to float +; MIN_LE16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 +; MIN_LE16-NEXT: [[R_I2:%.*]] = fpext half [[A_I2]] to float +; MIN_LE16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 +; MIN_LE16-NEXT: [[R_I3:%.*]] = fpext half [[A_I3]] to float +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x float> poison, float [[R_I0]], i64 0 +; MIN_LE16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x float> [[R_UPTO0]], float [[R_I1]], i64 1 +; MIN_LE16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x float> [[R_UPTO1]], float [[R_I2]], i64 2 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <4 x float> [[R_UPTO2]], float [[R_I3]], i64 3 +; MIN_LE16-NEXT: ret <4 x float> [[R]] +; +; MIN32-LABEL: @fpext_v4f16( +; MIN32-NEXT: [[R:%.*]] = fpext <4 x half> [[A:%.*]] to <4 x float> +; MIN32-NEXT: ret <4 x float> [[R]] +; + %r = fpext <4 x half> %a to <4 x float> + ret <4 x float> %r +} + +define <4 x i1> @icmp_v4f16(<4 x i16> %a, <4 x i16> %b) { +; DEFAULT-LABEL: @icmp_v4f16( +; DEFAULT-NEXT: [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0 +; DEFAULT-NEXT: [[B_I0:%.*]] = extractelement <4 x i16> [[B:%.*]], i64 0 +; DEFAULT-NEXT: [[R_I0:%.*]] = icmp ugt i16 [[A_I0]], [[B_I0]] +; DEFAULT-NEXT: [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1 +; DEFAULT-NEXT: [[B_I1:%.*]] = extractelement <4 x i16> [[B]], i64 1 +; DEFAULT-NEXT: [[R_I1:%.*]] = icmp ugt i16 [[A_I1]], [[B_I1]] +; DEFAULT-NEXT: [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2 +; DEFAULT-NEXT: [[B_I2:%.*]] = extractelement <4 x i16> [[B]], i64 2 +; DEFAULT-NEXT: [[R_I2:%.*]] = icmp ugt i16 [[A_I2]], [[B_I2]] +; DEFAULT-NEXT: [[A_I3:%.*]] = extractelement <4 x i16> [[A]], i64 3 +; DEFAULT-NEXT: [[B_I3:%.*]] = extractelement <4 x i16> [[B]], i64 3 +; DEFAULT-NEXT: [[R_I3:%.*]] = icmp ugt i16 [[A_I3]], [[B_I3]] +; DEFAULT-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x i1> poison, i1 [[R_I0]], i64 0 +; DEFAULT-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x i1> [[R_UPTO0]], i1 [[R_I1]], i64 1 +; DEFAULT-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x i1> [[R_UPTO1]], i1 [[R_I2]], i64 2 +; DEFAULT-NEXT: [[R:%.*]] = insertelement <4 x i1> [[R_UPTO2]], i1 [[R_I3]], i64 3 +; DEFAULT-NEXT: ret <4 x i1> [[R]] +; +; MIN_GE16-LABEL: @icmp_v4f16( +; MIN_GE16-NEXT: [[R:%.*]] = icmp ugt <4 x i16> [[A:%.*]], [[B:%.*]] +; MIN_GE16-NEXT: ret <4 x i1> [[R]] +; + %r = icmp ugt <4 x i16> %a, %b + ret <4 x i1> %r +} + +define <4 x ptr> @gep1_v4(ptr %base, <4 x i16> %a) { +; MIN_LE16-LABEL: @gep1_v4( +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[P_I0:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i16 [[A_I0]] +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1 +; MIN_LE16-NEXT: [[P_I1:%.*]] = getelementptr i32, ptr [[BASE]], i16 [[A_I1]] +; MIN_LE16-NEXT: [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2 +; MIN_LE16-NEXT: [[P_I2:%.*]] = getelementptr i32, ptr [[BASE]], i16 [[A_I2]] +; MIN_LE16-NEXT: [[A_I3:%.*]] = extractelement <4 x i16> [[A]], i64 3 +; MIN_LE16-NEXT: [[P_I3:%.*]] = getelementptr i32, ptr [[BASE]], i16 [[A_I3]] +; MIN_LE16-NEXT: [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0 +; MIN_LE16-NEXT: [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1 +; MIN_LE16-NEXT: [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2 +; MIN_LE16-NEXT: [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3 +; MIN_LE16-NEXT: ret <4 x ptr> [[P]] +; +; MIN32-LABEL: @gep1_v4( +; MIN32-NEXT: [[P:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i16> [[A:%.*]] +; MIN32-NEXT: ret <4 x ptr> [[P]] +; + %p = getelementptr i32, ptr %base, <4 x i16> %a + ret <4 x ptr> %p +} + +define <4 x ptr> @gep2_v4(<4 x ptr> %base, i16 %a) { +; CHECK-LABEL: @gep2_v4( +; CHECK-NEXT: [[BASE_I0:%.*]] = extractelement <4 x ptr> [[BASE:%.*]], i64 0 +; CHECK-NEXT: [[P_I0:%.*]] = getelementptr i32, ptr [[BASE_I0]], i16 [[A:%.*]] +; CHECK-NEXT: [[BASE_I1:%.*]] = extractelement <4 x ptr> [[BASE]], i64 1 +; CHECK-NEXT: [[P_I1:%.*]] = getelementptr i32, ptr [[BASE_I1]], i16 [[A]] +; CHECK-NEXT: [[BASE_I2:%.*]] = extractelement <4 x ptr> [[BASE]], i64 2 +; CHECK-NEXT: [[P_I2:%.*]] = getelementptr i32, ptr [[BASE_I2]], i16 [[A]] +; CHECK-NEXT: [[BASE_I3:%.*]] = extractelement <4 x ptr> [[BASE]], i64 3 +; CHECK-NEXT: [[P_I3:%.*]] = getelementptr i32, ptr [[BASE_I3]], i16 [[A]] +; CHECK-NEXT: [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0 +; CHECK-NEXT: [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1 +; CHECK-NEXT: [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2 +; CHECK-NEXT: [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3 +; CHECK-NEXT: ret <4 x ptr> [[P]] +; + %p = getelementptr i32, <4 x ptr> %base, i16 %a + ret <4 x ptr> %p +} + +define <4 x ptr> @gep3_v4(<4 x ptr> %base, <4 x i16> %a) { +; MIN_LE16-LABEL: @gep3_v4( +; MIN_LE16-NEXT: [[BASE_I0:%.*]] = extractelement <4 x ptr> [[BASE:%.*]], i64 0 +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[P_I0:%.*]] = getelementptr i32, ptr [[BASE_I0]], i16 [[A_I0]] +; MIN_LE16-NEXT: [[BASE_I1:%.*]] = extractelement <4 x ptr> [[BASE]], i64 1 +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1 +; MIN_LE16-NEXT: [[P_I1:%.*]] = getelementptr i32, ptr [[BASE_I1]], i16 [[A_I1]] +; MIN_LE16-NEXT: [[BASE_I2:%.*]] = extractelement <4 x ptr> [[BASE]], i64 2 +; MIN_LE16-NEXT: [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2 +; MIN_LE16-NEXT: [[P_I2:%.*]] = getelementptr i32, ptr [[BASE_I2]], i16 [[A_I2]] +; MIN_LE16-NEXT: [[BASE_I3:%.*]] = extractelement <4 x ptr> [[BASE]], i64 3 +; MIN_LE16-NEXT: [[A_I3:%.*]] = extractelement <4 x i16> [[A]], i64 3 +; MIN_LE16-NEXT: [[P_I3:%.*]] = getelementptr i32, ptr [[BASE_I3]], i16 [[A_I3]] +; MIN_LE16-NEXT: [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0 +; MIN_LE16-NEXT: [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1 +; MIN_LE16-NEXT: [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2 +; MIN_LE16-NEXT: [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3 +; MIN_LE16-NEXT: ret <4 x ptr> [[P]] +; +; MIN32-LABEL: @gep3_v4( +; MIN32-NEXT: [[P:%.*]] = getelementptr i32, <4 x ptr> [[BASE:%.*]], <4 x i16> [[A:%.*]] +; MIN32-NEXT: ret <4 x ptr> [[P]] +; + %p = getelementptr i32, <4 x ptr> %base, <4 x i16> %a + ret <4 x ptr> %p +} + +define void @insertelement_v2i16(ptr %p, <2 x i16> %a, i16 %b) { +; MIN_LE16-LABEL: @insertelement_v2i16( +; MIN_LE16-NEXT: [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <2 x i16> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: store i16 [[A_I0]], ptr [[P]], align 4 +; MIN_LE16-NEXT: store i16 [[B:%.*]], ptr [[P_I1]], align 2 +; MIN_LE16-NEXT: ret void +; +; MIN32-LABEL: @insertelement_v2i16( +; MIN32-NEXT: [[R:%.*]] = insertelement <2 x i16> [[A:%.*]], i16 [[B:%.*]], i64 1 +; MIN32-NEXT: store <2 x i16> [[R]], ptr [[P:%.*]], align 4 +; MIN32-NEXT: ret void +; + %r = insertelement <2 x i16> %a, i16 %b, i64 1 + store <2 x i16> %r, ptr %p + ret void +} + +define void @insertelement_v3i16(ptr %p, <3 x i16> %a, i16 %b) { +; MIN_LE16-LABEL: @insertelement_v3i16( +; MIN_LE16-NEXT: [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 +; MIN_LE16-NEXT: [[P_I2:%.*]] = getelementptr i16, ptr [[P]], i32 2 +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <3 x i16> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <3 x i16> [[A]], i64 1 +; MIN_LE16-NEXT: store i16 [[A_I0]], ptr [[P]], align 8 +; MIN_LE16-NEXT: store i16 [[A_I1]], ptr [[P_I1]], align 2 +; MIN_LE16-NEXT: store i16 [[B:%.*]], ptr [[P_I2]], align 4 +; MIN_LE16-NEXT: ret void +; +; MIN32-LABEL: @insertelement_v3i16( +; MIN32-NEXT: [[P_I1:%.*]] = getelementptr <2 x i16>, ptr [[P:%.*]], i32 1 +; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x i16> [[A:%.*]], <3 x i16> poison, <2 x i32> +; MIN32-NEXT: store <2 x i16> [[A_I0]], ptr [[P]], align 8 +; MIN32-NEXT: store i16 [[B:%.*]], ptr [[P_I1]], align 4 +; MIN32-NEXT: ret void +; + %r = insertelement <3 x i16> %a, i16 %b, i64 2 + store <3 x i16> %r, ptr %p + ret void +} + +define void @insertelement_v4i16(ptr %p, <4 x i16> %a, i16 %b) { +; MIN_LE16-LABEL: @insertelement_v4i16( +; MIN_LE16-NEXT: [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 +; MIN_LE16-NEXT: [[P_I2:%.*]] = getelementptr i16, ptr [[P]], i32 2 +; MIN_LE16-NEXT: [[P_I3:%.*]] = getelementptr i16, ptr [[P]], i32 3 +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1 +; MIN_LE16-NEXT: [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2 +; MIN_LE16-NEXT: store i16 [[A_I0]], ptr [[P]], align 8 +; MIN_LE16-NEXT: store i16 [[A_I1]], ptr [[P_I1]], align 2 +; MIN_LE16-NEXT: store i16 [[A_I2]], ptr [[P_I2]], align 4 +; MIN_LE16-NEXT: store i16 [[B:%.*]], ptr [[P_I3]], align 2 +; MIN_LE16-NEXT: ret void +; +; MIN32-LABEL: @insertelement_v4i16( +; MIN32-NEXT: [[P_I1:%.*]] = getelementptr <2 x i16>, ptr [[P:%.*]], i32 1 +; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x i16> [[A:%.*]], <4 x i16> poison, <2 x i32> +; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> poison, <2 x i32> +; MIN32-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> [[A_I1]], i16 [[B:%.*]], i64 1 +; MIN32-NEXT: store <2 x i16> [[A_I0]], ptr [[P]], align 8 +; MIN32-NEXT: store <2 x i16> [[TMP1]], ptr [[P_I1]], align 4 +; MIN32-NEXT: ret void +; + %r = insertelement <4 x i16> %a, i16 %b, i64 3 + store <4 x i16> %r, ptr %p + ret void +} + +define <2 x i16> @load_insertelement_v2i16(ptr %pa, i16 %b) { +; MIN_LE16-LABEL: @load_insertelement_v2i16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 4 +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x i16> poison, i16 [[A_I0]], i64 0 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <2 x i16> [[R_UPTO0]], i16 [[B:%.*]], i64 1 +; MIN_LE16-NEXT: ret <2 x i16> [[R]] +; +; MIN32-LABEL: @load_insertelement_v2i16( +; MIN32-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 4 +; MIN32-NEXT: [[R:%.*]] = insertelement <2 x i16> [[A]], i16 [[B:%.*]], i64 1 +; MIN32-NEXT: ret <2 x i16> [[R]] +; + %a = load <2 x i16>, ptr %pa + %r = insertelement <2 x i16> %a, i16 %b, i64 1 + ret <2 x i16> %r +} + +define <3 x i16> @load_insertelement_v3i16(ptr %pa, i16 %b) { +; MIN_LE16-LABEL: @load_insertelement_v3i16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8 +; MIN_LE16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 +; MIN_LE16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x i16> poison, i16 [[A_I0]], i64 0 +; MIN_LE16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x i16> [[R_UPTO0]], i16 [[A_I1]], i64 1 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <3 x i16> [[R_UPTO1]], i16 [[B:%.*]], i64 2 +; MIN_LE16-NEXT: ret <3 x i16> [[R]] +; +; MIN32-LABEL: @load_insertelement_v3i16( +; MIN32-NEXT: [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8 +; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <3 x i32> +; MIN32-NEXT: [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[B:%.*]], i64 2 +; MIN32-NEXT: ret <3 x i16> [[R]] +; + %a = load <3 x i16>, ptr %pa + %r = insertelement <3 x i16> %a, i16 %b, i64 2 + ret <3 x i16> %r +} + +define <4 x i16> @load_insertelement_v4i16(ptr %pa, i16 %b) { +; MIN_LE16-LABEL: @load_insertelement_v4i16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8 +; MIN_LE16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 +; MIN_LE16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 +; MIN_LE16-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2 +; MIN_LE16-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4 +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x i16> poison, i16 [[A_I0]], i64 0 +; MIN_LE16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x i16> [[R_UPTO0]], i16 [[A_I1]], i64 1 +; MIN_LE16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x i16> [[R_UPTO1]], i16 [[A_I2]], i64 2 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <4 x i16> [[R_UPTO2]], i16 [[B:%.*]], i64 3 +; MIN_LE16-NEXT: ret <4 x i16> [[R]] +; +; MIN32-LABEL: @load_insertelement_v4i16( +; MIN32-NEXT: [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8 +; MIN32-NEXT: [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1 +; MIN32-NEXT: [[A_I1:%.*]] = load <2 x i16>, ptr [[PA_I1]], align 4 +; MIN32-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> [[A_I1]], i16 [[B:%.*]], i64 1 +; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <4 x i32> +; MIN32-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> [[TMP1]], <4 x i32> +; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> +; MIN32-NEXT: ret <4 x i16> [[R]] +; + %a = load <4 x i16>, ptr %pa + %r = insertelement <4 x i16> %a, i16 %b, i64 3 + ret <4 x i16> %r +} + +define void @shufflevector_grow(ptr %pa, ptr %pb) { +; MIN_LE16-LABEL: @shufflevector_grow( +; MIN_LE16-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA:%.*]], i32 2 +; MIN_LE16-NEXT: [[PA_I3:%.*]] = getelementptr i16, ptr [[PA]], i32 3 +; MIN_LE16-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1 +; MIN_LE16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA]], align 4 +; MIN_LE16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 +; MIN_LE16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 +; MIN_LE16-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 4 +; MIN_LE16-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2 +; MIN_LE16-NEXT: store i16 [[A_I0]], ptr [[PA]], align 8 +; MIN_LE16-NEXT: store i16 [[A_I1]], ptr [[PA_I1]], align 2 +; MIN_LE16-NEXT: store i16 [[B_I0]], ptr [[PA_I2]], align 4 +; MIN_LE16-NEXT: store i16 [[B_I1]], ptr [[PA_I3]], align 2 +; MIN_LE16-NEXT: ret void +; +; MIN32-LABEL: @shufflevector_grow( +; MIN32-NEXT: [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA:%.*]], i32 1 +; MIN32-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA]], align 4 +; MIN32-NEXT: [[B:%.*]] = load <2 x i16>, ptr [[PB:%.*]], align 4 +; MIN32-NEXT: [[R:%.*]] = shufflevector <2 x i16> [[A]], <2 x i16> [[B]], <4 x i32> +; MIN32-NEXT: [[R_I0:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> poison, <2 x i32> +; MIN32-NEXT: store <2 x i16> [[R_I0]], ptr [[PA]], align 8 +; MIN32-NEXT: [[R_I1:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> poison, <2 x i32> +; MIN32-NEXT: store <2 x i16> [[R_I1]], ptr [[PA_I1]], align 4 +; MIN32-NEXT: ret void +; + %a = load <2 x i16>, ptr %pa + %b = load <2 x i16>, ptr %pb + %r = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> + store <4 x i16> %r, ptr %pa + ret void +} + +define void @shufflevector_shrink(ptr %pa) { +; MIN_LE16-LABEL: @shufflevector_shrink( +; MIN_LE16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA:%.*]], i32 1 +; MIN_LE16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 +; MIN_LE16-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2 +; MIN_LE16-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4 +; MIN_LE16-NEXT: store i16 [[A_I1]], ptr [[PA]], align 4 +; MIN_LE16-NEXT: store i16 [[A_I2]], ptr [[PA_I1]], align 2 +; MIN_LE16-NEXT: ret void +; +; MIN32-LABEL: @shufflevector_shrink( +; MIN32-NEXT: [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8 +; MIN32-NEXT: [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1 +; MIN32-NEXT: [[A_I1:%.*]] = load <2 x i16>, ptr [[PA_I1]], align 4 +; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <4 x i32> +; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[A_I1]], <2 x i16> [[A_I1]], <4 x i32> +; MIN32-NEXT: [[A:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP2]], <4 x i32> +; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> poison, <2 x i32> +; MIN32-NEXT: store <2 x i16> [[R]], ptr [[PA]], align 4 +; MIN32-NEXT: ret void +; + %a = load <4 x i16>, ptr %pa + %r = shufflevector <4 x i16> %a, <4 x i16> poison, <2 x i32> + store <2 x i16> %r, ptr %pa + ret void +} + +define void @phi_v2f16(ptr %base, i64 %bound) { +; MIN_LE16-LABEL: @phi_v2f16( +; MIN_LE16-NEXT: entry: +; MIN_LE16-NEXT: [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1 +; MIN_LE16-NEXT: br label [[LOOP:%.*]] +; MIN_LE16: loop: +; MIN_LE16-NEXT: [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ] +; MIN_LE16-NEXT: [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ] +; MIN_LE16-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] +; MIN_LE16-NEXT: [[P:%.*]] = getelementptr <2 x half>, ptr [[BASE]], i64 [[IDX]] +; MIN_LE16-NEXT: [[A_I0:%.*]] = load half, ptr [[P]], align 2 +; MIN_LE16-NEXT: [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1 +; MIN_LE16-NEXT: [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2 +; MIN_LE16-NEXT: [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]] +; MIN_LE16-NEXT: [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]] +; MIN_LE16-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 +; MIN_LE16-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] +; MIN_LE16-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; MIN_LE16: end: +; MIN_LE16-NEXT: store half [[X_NEXT_I0]], ptr [[BASE]], align 4 +; MIN_LE16-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2 +; MIN_LE16-NEXT: ret void +; +; MIN32-LABEL: @phi_v2f16( +; MIN32-NEXT: entry: +; MIN32-NEXT: br label [[LOOP:%.*]] +; MIN32: loop: +; MIN32-NEXT: [[X:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ] +; MIN32-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] +; MIN32-NEXT: [[P:%.*]] = getelementptr <2 x half>, ptr [[BASE:%.*]], i64 [[IDX]] +; MIN32-NEXT: [[A:%.*]] = load <2 x half>, ptr [[P]], align 2 +; MIN32-NEXT: [[X_NEXT]] = fadd <2 x half> [[X]], [[A]] +; MIN32-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 +; MIN32-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] +; MIN32-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; MIN32: end: +; MIN32-NEXT: store <2 x half> [[X_NEXT]], ptr [[BASE]], align 4 +; MIN32-NEXT: ret void +; +entry: + br label %loop + +loop: + %x = phi <2 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ] + %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ] + %p = getelementptr <2 x half>, ptr %base, i64 %idx + %a = load <2 x half>, ptr %p, align 2 + %x.next = fadd <2 x half> %x, %a + %idx.next = add i64 %idx, 1 + %cc = icmp ult i64 %idx.next, %bound + br i1 %cc, label %loop, label %end + +end: + store <2 x half> %x.next, ptr %base + ret void +} + +define void @phi_v3f16(ptr %base, i64 %bound) { +; MIN_LE16-LABEL: @phi_v3f16( +; MIN_LE16-NEXT: entry: +; MIN_LE16-NEXT: [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1 +; MIN_LE16-NEXT: [[BASE_I2:%.*]] = getelementptr half, ptr [[BASE]], i32 2 +; MIN_LE16-NEXT: br label [[LOOP:%.*]] +; MIN_LE16: loop: +; MIN_LE16-NEXT: [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ] +; MIN_LE16-NEXT: [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ] +; MIN_LE16-NEXT: [[X_I2:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I2:%.*]], [[LOOP]] ] +; MIN_LE16-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] +; MIN_LE16-NEXT: [[P:%.*]] = getelementptr <3 x half>, ptr [[BASE]], i64 [[IDX]] +; MIN_LE16-NEXT: [[A_I0:%.*]] = load half, ptr [[P]], align 2 +; MIN_LE16-NEXT: [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1 +; MIN_LE16-NEXT: [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2 +; MIN_LE16-NEXT: [[P_I2:%.*]] = getelementptr half, ptr [[P]], i32 2 +; MIN_LE16-NEXT: [[A_I2:%.*]] = load half, ptr [[P_I2]], align 2 +; MIN_LE16-NEXT: [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]] +; MIN_LE16-NEXT: [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]] +; MIN_LE16-NEXT: [[X_NEXT_I2]] = fadd half [[X_I2]], [[A_I2]] +; MIN_LE16-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 +; MIN_LE16-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] +; MIN_LE16-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; MIN_LE16: end: +; MIN_LE16-NEXT: store half [[X_NEXT_I0]], ptr [[BASE]], align 8 +; MIN_LE16-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2 +; MIN_LE16-NEXT: store half [[X_NEXT_I2]], ptr [[BASE_I2]], align 4 +; MIN_LE16-NEXT: ret void +; +; MIN32-LABEL: @phi_v3f16( +; MIN32-NEXT: entry: +; MIN32-NEXT: [[BASE_I1:%.*]] = getelementptr <2 x half>, ptr [[BASE:%.*]], i32 1 +; MIN32-NEXT: br label [[LOOP:%.*]] +; MIN32: loop: +; MIN32-NEXT: [[X_I0:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ] +; MIN32-NEXT: [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ] +; MIN32-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] +; MIN32-NEXT: [[P:%.*]] = getelementptr <3 x half>, ptr [[BASE]], i64 [[IDX]] +; MIN32-NEXT: [[A_I0:%.*]] = load <2 x half>, ptr [[P]], align 2 +; MIN32-NEXT: [[P_I1:%.*]] = getelementptr <2 x half>, ptr [[P]], i32 1 +; MIN32-NEXT: [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2 +; MIN32-NEXT: [[X_NEXT_I0]] = fadd <2 x half> [[X_I0]], [[A_I0]] +; MIN32-NEXT: [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]] +; MIN32-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 +; MIN32-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] +; MIN32-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; MIN32: end: +; MIN32-NEXT: store <2 x half> [[X_NEXT_I0]], ptr [[BASE]], align 8 +; MIN32-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 4 +; MIN32-NEXT: ret void +; +entry: + br label %loop + +loop: + %x = phi <3 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ] + %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ] + %p = getelementptr <3 x half>, ptr %base, i64 %idx + %a = load <3 x half>, ptr %p, align 2 + %x.next = fadd <3 x half> %x, %a + %idx.next = add i64 %idx, 1 + %cc = icmp ult i64 %idx.next, %bound + br i1 %cc, label %loop, label %end + +end: + store <3 x half> %x.next, ptr %base + ret void +} + +define void @phi_v4f16(ptr %base, i64 %bound) { +; MIN_LE16-LABEL: @phi_v4f16( +; MIN_LE16-NEXT: entry: +; MIN_LE16-NEXT: [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1 +; MIN_LE16-NEXT: [[BASE_I2:%.*]] = getelementptr half, ptr [[BASE]], i32 2 +; MIN_LE16-NEXT: [[BASE_I3:%.*]] = getelementptr half, ptr [[BASE]], i32 3 +; MIN_LE16-NEXT: br label [[LOOP:%.*]] +; MIN_LE16: loop: +; MIN_LE16-NEXT: [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ] +; MIN_LE16-NEXT: [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ] +; MIN_LE16-NEXT: [[X_I2:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I2:%.*]], [[LOOP]] ] +; MIN_LE16-NEXT: [[X_I3:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I3:%.*]], [[LOOP]] ] +; MIN_LE16-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] +; MIN_LE16-NEXT: [[P:%.*]] = getelementptr <4 x half>, ptr [[BASE]], i64 [[IDX]] +; MIN_LE16-NEXT: [[A_I0:%.*]] = load half, ptr [[P]], align 2 +; MIN_LE16-NEXT: [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1 +; MIN_LE16-NEXT: [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2 +; MIN_LE16-NEXT: [[P_I2:%.*]] = getelementptr half, ptr [[P]], i32 2 +; MIN_LE16-NEXT: [[A_I2:%.*]] = load half, ptr [[P_I2]], align 2 +; MIN_LE16-NEXT: [[P_I3:%.*]] = getelementptr half, ptr [[P]], i32 3 +; MIN_LE16-NEXT: [[A_I3:%.*]] = load half, ptr [[P_I3]], align 2 +; MIN_LE16-NEXT: [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]] +; MIN_LE16-NEXT: [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]] +; MIN_LE16-NEXT: [[X_NEXT_I2]] = fadd half [[X_I2]], [[A_I2]] +; MIN_LE16-NEXT: [[X_NEXT_I3]] = fadd half [[X_I3]], [[A_I3]] +; MIN_LE16-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 +; MIN_LE16-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] +; MIN_LE16-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; MIN_LE16: end: +; MIN_LE16-NEXT: store half [[X_NEXT_I0]], ptr [[BASE]], align 8 +; MIN_LE16-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2 +; MIN_LE16-NEXT: store half [[X_NEXT_I2]], ptr [[BASE_I2]], align 4 +; MIN_LE16-NEXT: store half [[X_NEXT_I3]], ptr [[BASE_I3]], align 2 +; MIN_LE16-NEXT: ret void +; +; MIN32-LABEL: @phi_v4f16( +; MIN32-NEXT: entry: +; MIN32-NEXT: [[BASE_I1:%.*]] = getelementptr <2 x half>, ptr [[BASE:%.*]], i32 1 +; MIN32-NEXT: br label [[LOOP:%.*]] +; MIN32: loop: +; MIN32-NEXT: [[X_I0:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ] +; MIN32-NEXT: [[X_I1:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ] +; MIN32-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] +; MIN32-NEXT: [[P:%.*]] = getelementptr <4 x half>, ptr [[BASE]], i64 [[IDX]] +; MIN32-NEXT: [[A_I0:%.*]] = load <2 x half>, ptr [[P]], align 2 +; MIN32-NEXT: [[P_I1:%.*]] = getelementptr <2 x half>, ptr [[P]], i32 1 +; MIN32-NEXT: [[A_I1:%.*]] = load <2 x half>, ptr [[P_I1]], align 2 +; MIN32-NEXT: [[X_NEXT_I0]] = fadd <2 x half> [[X_I0]], [[A_I0]] +; MIN32-NEXT: [[X_NEXT_I1]] = fadd <2 x half> [[X_I1]], [[A_I1]] +; MIN32-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 +; MIN32-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] +; MIN32-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; MIN32: end: +; MIN32-NEXT: store <2 x half> [[X_NEXT_I0]], ptr [[BASE]], align 8 +; MIN32-NEXT: store <2 x half> [[X_NEXT_I1]], ptr [[BASE_I1]], align 4 +; MIN32-NEXT: ret void +; +entry: + br label %loop + +loop: + %x = phi <4 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ] + %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ] + %p = getelementptr <4 x half>, ptr %base, i64 %idx + %a = load <4 x half>, ptr %p, align 2 + %x.next = fadd <4 x half> %x, %a + %idx.next = add i64 %idx, 1 + %cc = icmp ult i64 %idx.next, %bound + br i1 %cc, label %loop, label %end + +end: + store <4 x half> %x.next, ptr %base + ret void +} + +define <2 x half> @call_v2f16(<2 x half> %a, <2 x half> %b) { +; MIN_LE16-LABEL: @call_v2f16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0 +; MIN_LE16-NEXT: [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]]) +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1 +; MIN_LE16-NEXT: [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1 +; MIN_LE16-NEXT: [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]]) +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; MIN_LE16-NEXT: ret <2 x half> [[R]] +; +; MIN32-LABEL: @call_v2f16( +; MIN32-NEXT: [[R:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A:%.*]], <2 x half> [[B:%.*]]) +; MIN32-NEXT: ret <2 x half> [[R]] +; + %r = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b) + ret <2 x half> %r +} + +define <3 x half> @call_v3f16(<3 x half> %a, <3 x half> %b) { +; MIN_LE16-LABEL: @call_v3f16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0 +; MIN_LE16-NEXT: [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]]) +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1 +; MIN_LE16-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1 +; MIN_LE16-NEXT: [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]]) +; MIN_LE16-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2 +; MIN_LE16-NEXT: [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2 +; MIN_LE16-NEXT: [[R_I2:%.*]] = call half @llvm.minnum.f16(half [[A_I2]], half [[B_I2]]) +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0 +; MIN_LE16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; MIN_LE16-NEXT: ret <3 x half> [[R]] +; +; MIN32-LABEL: @call_v3f16( +; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> +; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <3 x half> [[B:%.*]], <3 x half> poison, <2 x i32> +; MIN32-NEXT: [[R_I0:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A_I0]], <2 x half> [[B_I0]]) +; MIN32-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2 +; MIN32-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 2 +; MIN32-NEXT: [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]]) +; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> +; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2 +; MIN32-NEXT: ret <3 x half> [[R]] +; + %r = call <3 x half> @llvm.minnum.v3f16(<3 x half> %a, <3 x half> %b) + ret <3 x half> %r +} + +define <4 x half> @call_v4f16(<4 x half> %a, <4 x half> %b) { +; MIN_LE16-LABEL: @call_v4f16( +; MIN_LE16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 +; MIN_LE16-NEXT: [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0 +; MIN_LE16-NEXT: [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]]) +; MIN_LE16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 +; MIN_LE16-NEXT: [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1 +; MIN_LE16-NEXT: [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]]) +; MIN_LE16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 +; MIN_LE16-NEXT: [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2 +; MIN_LE16-NEXT: [[R_I2:%.*]] = call half @llvm.minnum.f16(half [[A_I2]], half [[B_I2]]) +; MIN_LE16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 +; MIN_LE16-NEXT: [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3 +; MIN_LE16-NEXT: [[R_I3:%.*]] = call half @llvm.minnum.f16(half [[A_I3]], half [[B_I3]]) +; MIN_LE16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0 +; MIN_LE16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; MIN_LE16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; MIN_LE16-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3 +; MIN_LE16-NEXT: ret <4 x half> [[R]] +; +; MIN32-LABEL: @call_v4f16( +; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> +; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <4 x half> [[B:%.*]], <4 x half> poison, <2 x i32> +; MIN32-NEXT: [[R_I0:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A_I0]], <2 x half> [[B_I0]]) +; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> +; MIN32-NEXT: [[B_I1:%.*]] = shufflevector <4 x half> [[B]], <4 x half> poison, <2 x i32> +; MIN32-NEXT: [[R_I1:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A_I1]], <2 x half> [[B_I1]]) +; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> +; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> +; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> +; MIN32-NEXT: ret <4 x half> [[R]] +; + %r = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %r +} + +declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) +declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>) +declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>) diff --git a/llvm/test/Transforms/Scalarizer/opaque-ptr-bug.ll b/llvm/test/Transforms/Scalarizer/opaque-ptr-bug.ll --- a/llvm/test/Transforms/Scalarizer/opaque-ptr-bug.ll +++ b/llvm/test/Transforms/Scalarizer/opaque-ptr-bug.ll @@ -6,19 +6,18 @@ define void @test1(ptr %p) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[P_I12:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 -; CHECK-NEXT: [[P_I11:%.*]] = getelementptr i32, ptr [[P]], i32 1 +; CHECK-NEXT: [[P_I11:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 ; CHECK-NEXT: [[P_I2:%.*]] = getelementptr i32, ptr [[P]], i32 2 ; CHECK-NEXT: [[P_I3:%.*]] = getelementptr i32, ptr [[P]], i32 3 ; CHECK-NEXT: store i32 0, ptr [[P]], align 8 ; CHECK-NEXT: [[P_I1:%.*]] = getelementptr i32, ptr [[P]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[P_I1]], align 4 ; CHECK-NEXT: store i32 0, ptr [[P]], align 16 -; CHECK-NEXT: store i32 0, ptr [[P_I11]], align 4 +; CHECK-NEXT: store i32 0, ptr [[P_I1]], align 4 ; CHECK-NEXT: store i32 0, ptr [[P_I2]], align 8 ; CHECK-NEXT: store i32 0, ptr [[P_I3]], align 4 ; CHECK-NEXT: store i16 0, ptr [[P]], align 4 -; CHECK-NEXT: store i16 0, ptr [[P_I12]], align 2 +; CHECK-NEXT: store i16 0, ptr [[P_I11]], align 2 ; CHECK-NEXT: ret void ; store <2 x i32> zeroinitializer, ptr %p diff --git a/llvm/test/Transforms/Scalarizer/order-bug-inseltpoison.ll b/llvm/test/Transforms/Scalarizer/order-bug-inseltpoison.ll --- a/llvm/test/Transforms/Scalarizer/order-bug-inseltpoison.ll +++ b/llvm/test/Transforms/Scalarizer/order-bug-inseltpoison.ll @@ -9,10 +9,10 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[Z:%.*]] ; CHECK: y: -; CHECK-NEXT: [[F_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[B_I0:%.*]], i32 0 -; CHECK-NEXT: [[F_UPTO1:%.*]] = insertelement <4 x i32> [[F_UPTO0]], i32 [[B_I0]], i32 1 -; CHECK-NEXT: [[F_UPTO2:%.*]] = insertelement <4 x i32> [[F_UPTO1]], i32 [[B_I0]], i32 2 -; CHECK-NEXT: [[F:%.*]] = insertelement <4 x i32> [[F_UPTO2]], i32 [[B_I0]], i32 3 +; CHECK-NEXT: [[F_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[B_I0:%.*]], i64 0 +; CHECK-NEXT: [[F_UPTO1:%.*]] = insertelement <4 x i32> [[F_UPTO0]], i32 [[B_I0]], i64 1 +; CHECK-NEXT: [[F_UPTO2:%.*]] = insertelement <4 x i32> [[F_UPTO1]], i32 [[B_I0]], i64 2 +; CHECK-NEXT: [[F:%.*]] = insertelement <4 x i32> [[F_UPTO2]], i32 [[B_I0]], i64 3 ; CHECK-NEXT: ret <4 x i32> [[F]] ; CHECK: z: ; CHECK-NEXT: [[B_I0]] = bitcast float [[A:%.*]] to i32 diff --git a/llvm/test/Transforms/Scalarizer/order-bug.ll b/llvm/test/Transforms/Scalarizer/order-bug.ll --- a/llvm/test/Transforms/Scalarizer/order-bug.ll +++ b/llvm/test/Transforms/Scalarizer/order-bug.ll @@ -9,10 +9,10 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[Z:%.*]] ; CHECK: y: -; CHECK-NEXT: [[F_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[B_I0:%.*]], i32 0 -; CHECK-NEXT: [[F_UPTO1:%.*]] = insertelement <4 x i32> [[F_UPTO0]], i32 [[B_I0]], i32 1 -; CHECK-NEXT: [[F_UPTO2:%.*]] = insertelement <4 x i32> [[F_UPTO1]], i32 [[B_I0]], i32 2 -; CHECK-NEXT: [[F:%.*]] = insertelement <4 x i32> [[F_UPTO2]], i32 [[B_I0]], i32 3 +; CHECK-NEXT: [[F_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[B_I0:%.*]], i64 0 +; CHECK-NEXT: [[F_UPTO1:%.*]] = insertelement <4 x i32> [[F_UPTO0]], i32 [[B_I0]], i64 1 +; CHECK-NEXT: [[F_UPTO2:%.*]] = insertelement <4 x i32> [[F_UPTO1]], i32 [[B_I0]], i64 2 +; CHECK-NEXT: [[F:%.*]] = insertelement <4 x i32> [[F_UPTO2]], i32 [[B_I0]], i64 3 ; CHECK-NEXT: ret <4 x i32> [[F]] ; CHECK: z: ; CHECK-NEXT: [[B_I0]] = bitcast float [[A:%.*]] to i32 diff --git a/llvm/test/Transforms/Scalarizer/phi-order.ll b/llvm/test/Transforms/Scalarizer/phi-order.ll --- a/llvm/test/Transforms/Scalarizer/phi-order.ll +++ b/llvm/test/Transforms/Scalarizer/phi-order.ll @@ -13,9 +13,9 @@ ; CHECK-NEXT: [[DOTI12:%.*]] = phi float [ 0.000000e+00, [[DOTENTRY]] ], [ [[DOTI12]], [[TMP3]] ] ; CHECK-NEXT: [[DOTI23:%.*]] = phi float [ 0.000000e+00, [[DOTENTRY]] ], [ [[DOTI23]], [[TMP3]] ] ; CHECK-NEXT: [[TMP1:%.*]] = phi float [ 1.000000e+00, [[DOTENTRY]] ], [ 2.000000e+00, [[TMP3]] ] -; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <3 x float> poison, float [[DOTI01]], i32 0 -; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <3 x float> [[DOTUPTO0]], float [[DOTI12]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[DOTUPTO1]], float [[DOTI23]], i32 2 +; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <3 x float> poison, float [[DOTI01]], i64 0 +; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <3 x float> [[DOTUPTO0]], float [[DOTI12]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[DOTUPTO1]], float [[DOTI23]], i64 2 ; CHECK-NEXT: br label [[TMP3]] ; CHECK: 3: ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[INVAL:%.*]], 0 diff --git a/llvm/test/Transforms/Scalarizer/scatter-order.ll b/llvm/test/Transforms/Scalarizer/scatter-order.ll --- a/llvm/test/Transforms/Scalarizer/scatter-order.ll +++ b/llvm/test/Transforms/Scalarizer/scatter-order.ll @@ -11,14 +11,14 @@ define <2 x i32> @test1(i1 %b, <2 x i32> %i, <2 x i32> %j) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 -; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = select i1 [[B:%.*]], i32 [[I_I0]], i32 [[J_I0]] -; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 -; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1 +; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = select i1 [[B]], i32 [[I_I1]], i32 [[J_I1]] -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[RES]] ; %res = select i1 %b, <2 x i32> %i, <2 x i32> %j @@ -27,16 +27,16 @@ define <2 x i32> @test2(<2 x i1> %b, <2 x i32> %i, <2 x i32> %j) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[B_I0:%.*]] = extractelement <2 x i1> [[B:%.*]], i32 0 -; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 -; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <2 x i1> [[B:%.*]], i64 0 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = select i1 [[B_I0]], i32 [[I_I0]], i32 [[J_I0]] -; CHECK-NEXT: [[B_I1:%.*]] = extractelement <2 x i1> [[B]], i32 1 -; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 -; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <2 x i1> [[B]], i64 1 +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1 +; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = select i1 [[B_I1]], i32 [[I_I1]], i32 [[J_I1]] -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[RES]] ; %res = select <2 x i1> %b, <2 x i32> %i, <2 x i32> %j @@ -45,14 +45,14 @@ define <2 x i32> @test3(<2 x i32> %i, <2 x i32> %j) { ; CHECK-LABEL: @test3( -; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 -; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 +; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0 +; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0 ; CHECK-NEXT: [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]] -; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 -; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 +; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1 +; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1 ; CHECK-NEXT: [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]] -; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0 -; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 +; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i64 0 +; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[RES]] ; %res = add nuw nsw <2 x i32> %i, %j @@ -61,9 +61,9 @@ define void @test4(ptr %ptr, <2 x i32> %val) { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[VAL_I0:%.*]] = extractelement <2 x i32> [[VAL:%.*]], i32 0 +; CHECK-NEXT: [[VAL_I0:%.*]] = extractelement <2 x i32> [[VAL:%.*]], i64 0 ; CHECK-NEXT: store i32 [[VAL_I0]], ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[VAL_I1:%.*]] = extractelement <2 x i32> [[VAL]], i32 1 +; CHECK-NEXT: [[VAL_I1:%.*]] = extractelement <2 x i32> [[VAL]], i64 1 ; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr i32, ptr [[PTR]], i32 1 ; CHECK-NEXT: store i32 [[VAL_I1]], ptr [[PTR_I1]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Scalarizer/variable-extractelement.ll b/llvm/test/Transforms/Scalarizer/variable-extractelement.ll --- a/llvm/test/Transforms/Scalarizer/variable-extractelement.ll +++ b/llvm/test/Transforms/Scalarizer/variable-extractelement.ll @@ -9,16 +9,16 @@ define i32 @f1(<4 x i32> %src, i32 %index) { ; DEFAULT-LABEL: @f1( ; DEFAULT-NEXT: [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0 -; DEFAULT-NEXT: [[SRC_I0:%.*]] = extractelement <4 x i32> [[SRC:%.*]], i32 0 +; DEFAULT-NEXT: [[SRC_I0:%.*]] = extractelement <4 x i32> [[SRC:%.*]], i64 0 ; DEFAULT-NEXT: [[RES_UPTO0:%.*]] = select i1 [[INDEX_IS_0]], i32 [[SRC_I0]], i32 poison ; DEFAULT-NEXT: [[INDEX_IS_1:%.*]] = icmp eq i32 [[INDEX]], 1 -; DEFAULT-NEXT: [[SRC_I1:%.*]] = extractelement <4 x i32> [[SRC]], i32 1 +; DEFAULT-NEXT: [[SRC_I1:%.*]] = extractelement <4 x i32> [[SRC]], i64 1 ; DEFAULT-NEXT: [[RES_UPTO1:%.*]] = select i1 [[INDEX_IS_1]], i32 [[SRC_I1]], i32 [[RES_UPTO0]] ; DEFAULT-NEXT: [[INDEX_IS_2:%.*]] = icmp eq i32 [[INDEX]], 2 -; DEFAULT-NEXT: [[SRC_I2:%.*]] = extractelement <4 x i32> [[SRC]], i32 2 +; DEFAULT-NEXT: [[SRC_I2:%.*]] = extractelement <4 x i32> [[SRC]], i64 2 ; DEFAULT-NEXT: [[RES_UPTO2:%.*]] = select i1 [[INDEX_IS_2]], i32 [[SRC_I2]], i32 [[RES_UPTO1]] ; DEFAULT-NEXT: [[INDEX_IS_3:%.*]] = icmp eq i32 [[INDEX]], 3 -; DEFAULT-NEXT: [[SRC_I3:%.*]] = extractelement <4 x i32> [[SRC]], i32 3 +; DEFAULT-NEXT: [[SRC_I3:%.*]] = extractelement <4 x i32> [[SRC]], i64 3 ; DEFAULT-NEXT: [[RES_UPTO3:%.*]] = select i1 [[INDEX_IS_3]], i32 [[SRC_I3]], i32 [[RES_UPTO2]] ; DEFAULT-NEXT: ret i32 [[RES_UPTO3]] ; @@ -33,13 +33,13 @@ define i32 @f2(ptr %src, i32 %index) { ; DEFAULT-LABEL: @f2( ; DEFAULT-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16 -; DEFAULT-NEXT: [[VAL0_I0:%.*]] = extractelement <4 x i32> [[VAL0]], i32 0 +; DEFAULT-NEXT: [[VAL0_I0:%.*]] = extractelement <4 x i32> [[VAL0]], i64 0 ; DEFAULT-NEXT: [[VAL1_I0:%.*]] = shl i32 1, [[VAL0_I0]] -; DEFAULT-NEXT: [[VAL0_I1:%.*]] = extractelement <4 x i32> [[VAL0]], i32 1 +; DEFAULT-NEXT: [[VAL0_I1:%.*]] = extractelement <4 x i32> [[VAL0]], i64 1 ; DEFAULT-NEXT: [[VAL1_I1:%.*]] = shl i32 2, [[VAL0_I1]] -; DEFAULT-NEXT: [[VAL0_I2:%.*]] = extractelement <4 x i32> [[VAL0]], i32 2 +; DEFAULT-NEXT: [[VAL0_I2:%.*]] = extractelement <4 x i32> [[VAL0]], i64 2 ; DEFAULT-NEXT: [[VAL1_I2:%.*]] = shl i32 3, [[VAL0_I2]] -; DEFAULT-NEXT: [[VAL0_I3:%.*]] = extractelement <4 x i32> [[VAL0]], i32 3 +; DEFAULT-NEXT: [[VAL0_I3:%.*]] = extractelement <4 x i32> [[VAL0]], i64 3 ; DEFAULT-NEXT: [[VAL1_I3:%.*]] = shl i32 4, [[VAL0_I3]] ; DEFAULT-NEXT: [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0 ; DEFAULT-NEXT: [[VAL2_UPTO0:%.*]] = select i1 [[INDEX_IS_0]], i32 [[VAL1_I0]], i32 poison @@ -53,18 +53,18 @@ ; ; OFF-LABEL: @f2( ; OFF-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16 -; OFF-NEXT: [[VAL0_I0:%.*]] = extractelement <4 x i32> [[VAL0]], i32 0 +; OFF-NEXT: [[VAL0_I0:%.*]] = extractelement <4 x i32> [[VAL0]], i64 0 ; OFF-NEXT: [[VAL1_I0:%.*]] = shl i32 1, [[VAL0_I0]] -; OFF-NEXT: [[VAL0_I1:%.*]] = extractelement <4 x i32> [[VAL0]], i32 1 +; OFF-NEXT: [[VAL0_I1:%.*]] = extractelement <4 x i32> [[VAL0]], i64 1 ; OFF-NEXT: [[VAL1_I1:%.*]] = shl i32 2, [[VAL0_I1]] -; OFF-NEXT: [[VAL0_I2:%.*]] = extractelement <4 x i32> [[VAL0]], i32 2 +; OFF-NEXT: [[VAL0_I2:%.*]] = extractelement <4 x i32> [[VAL0]], i64 2 ; OFF-NEXT: [[VAL1_I2:%.*]] = shl i32 3, [[VAL0_I2]] -; OFF-NEXT: [[VAL0_I3:%.*]] = extractelement <4 x i32> [[VAL0]], i32 3 +; OFF-NEXT: [[VAL0_I3:%.*]] = extractelement <4 x i32> [[VAL0]], i64 3 ; OFF-NEXT: [[VAL1_I3:%.*]] = shl i32 4, [[VAL0_I3]] -; OFF-NEXT: [[VAL1_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[VAL1_I0]], i32 0 -; OFF-NEXT: [[VAL1_UPTO1:%.*]] = insertelement <4 x i32> [[VAL1_UPTO0]], i32 [[VAL1_I1]], i32 1 -; OFF-NEXT: [[VAL1_UPTO2:%.*]] = insertelement <4 x i32> [[VAL1_UPTO1]], i32 [[VAL1_I2]], i32 2 -; OFF-NEXT: [[VAL1:%.*]] = insertelement <4 x i32> [[VAL1_UPTO2]], i32 [[VAL1_I3]], i32 3 +; OFF-NEXT: [[VAL1_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[VAL1_I0]], i64 0 +; OFF-NEXT: [[VAL1_UPTO1:%.*]] = insertelement <4 x i32> [[VAL1_UPTO0]], i32 [[VAL1_I1]], i64 1 +; OFF-NEXT: [[VAL1_UPTO2:%.*]] = insertelement <4 x i32> [[VAL1_UPTO1]], i32 [[VAL1_I2]], i64 2 +; OFF-NEXT: [[VAL1:%.*]] = insertelement <4 x i32> [[VAL1_UPTO2]], i32 [[VAL1_I3]], i64 3 ; OFF-NEXT: [[VAL2:%.*]] = extractelement <4 x i32> [[VAL1]], i32 [[INDEX:%.*]] ; OFF-NEXT: ret i32 [[VAL2]] ; diff --git a/llvm/test/Transforms/Scalarizer/variable-insertelement.ll b/llvm/test/Transforms/Scalarizer/variable-insertelement.ll --- a/llvm/test/Transforms/Scalarizer/variable-insertelement.ll +++ b/llvm/test/Transforms/Scalarizer/variable-insertelement.ll @@ -9,21 +9,21 @@ define <4 x i32> @f1(<4 x i32> %src, i32 %val, i32 %index) { ; DEFAULT-LABEL: @f1( ; DEFAULT-NEXT: [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0 -; DEFAULT-NEXT: [[SRC_I0:%.*]] = extractelement <4 x i32> [[SRC:%.*]], i32 0 +; DEFAULT-NEXT: [[SRC_I0:%.*]] = extractelement <4 x i32> [[SRC:%.*]], i64 0 ; DEFAULT-NEXT: [[RES_I0:%.*]] = select i1 [[INDEX_IS_0]], i32 [[VAL:%.*]], i32 [[SRC_I0]] ; DEFAULT-NEXT: [[INDEX_IS_1:%.*]] = icmp eq i32 [[INDEX]], 1 -; DEFAULT-NEXT: [[SRC_I1:%.*]] = extractelement <4 x i32> [[SRC]], i32 1 +; DEFAULT-NEXT: [[SRC_I1:%.*]] = extractelement <4 x i32> [[SRC]], i64 1 ; DEFAULT-NEXT: [[RES_I1:%.*]] = select i1 [[INDEX_IS_1]], i32 [[VAL]], i32 [[SRC_I1]] ; DEFAULT-NEXT: [[INDEX_IS_2:%.*]] = icmp eq i32 [[INDEX]], 2 -; DEFAULT-NEXT: [[SRC_I2:%.*]] = extractelement <4 x i32> [[SRC]], i32 2 +; DEFAULT-NEXT: [[SRC_I2:%.*]] = extractelement <4 x i32> [[SRC]], i64 2 ; DEFAULT-NEXT: [[RES_I2:%.*]] = select i1 [[INDEX_IS_2]], i32 [[VAL]], i32 [[SRC_I2]] ; DEFAULT-NEXT: [[INDEX_IS_3:%.*]] = icmp eq i32 [[INDEX]], 3 -; DEFAULT-NEXT: [[SRC_I3:%.*]] = extractelement <4 x i32> [[SRC]], i32 3 +; DEFAULT-NEXT: [[SRC_I3:%.*]] = extractelement <4 x i32> [[SRC]], i64 3 ; DEFAULT-NEXT: [[RES_I3:%.*]] = select i1 [[INDEX_IS_3]], i32 [[VAL]], i32 [[SRC_I3]] -; DEFAULT-NEXT: [[RES_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[RES_I0]], i32 0 -; DEFAULT-NEXT: [[RES_UPTO1:%.*]] = insertelement <4 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 -; DEFAULT-NEXT: [[RES_UPTO2:%.*]] = insertelement <4 x i32> [[RES_UPTO1]], i32 [[RES_I2]], i32 2 -; DEFAULT-NEXT: [[RES:%.*]] = insertelement <4 x i32> [[RES_UPTO2]], i32 [[RES_I3]], i32 3 +; DEFAULT-NEXT: [[RES_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[RES_I0]], i64 0 +; DEFAULT-NEXT: [[RES_UPTO1:%.*]] = insertelement <4 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1 +; DEFAULT-NEXT: [[RES_UPTO2:%.*]] = insertelement <4 x i32> [[RES_UPTO1]], i32 [[RES_I2]], i64 2 +; DEFAULT-NEXT: [[RES:%.*]] = insertelement <4 x i32> [[RES_UPTO2]], i32 [[RES_I3]], i64 3 ; DEFAULT-NEXT: ret <4 x i32> [[RES]] ; ; OFF-LABEL: @f1( @@ -38,43 +38,43 @@ ; DEFAULT-LABEL: @f2( ; DEFAULT-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16 ; DEFAULT-NEXT: [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0 -; DEFAULT-NEXT: [[VAL0_I0:%.*]] = extractelement <4 x i32> [[VAL0]], i32 0 +; DEFAULT-NEXT: [[VAL0_I0:%.*]] = extractelement <4 x i32> [[VAL0]], i64 0 ; DEFAULT-NEXT: [[VAL1_I0:%.*]] = select i1 [[INDEX_IS_0]], i32 1, i32 [[VAL0_I0]] ; DEFAULT-NEXT: [[INDEX_IS_1:%.*]] = icmp eq i32 [[INDEX]], 1 -; DEFAULT-NEXT: [[VAL0_I1:%.*]] = extractelement <4 x i32> [[VAL0]], i32 1 +; DEFAULT-NEXT: [[VAL0_I1:%.*]] = extractelement <4 x i32> [[VAL0]], i64 1 ; DEFAULT-NEXT: [[VAL1_I1:%.*]] = select i1 [[INDEX_IS_1]], i32 1, i32 [[VAL0_I1]] ; DEFAULT-NEXT: [[INDEX_IS_2:%.*]] = icmp eq i32 [[INDEX]], 2 -; DEFAULT-NEXT: [[VAL0_I2:%.*]] = extractelement <4 x i32> [[VAL0]], i32 2 +; DEFAULT-NEXT: [[VAL0_I2:%.*]] = extractelement <4 x i32> [[VAL0]], i64 2 ; DEFAULT-NEXT: [[VAL1_I2:%.*]] = select i1 [[INDEX_IS_2]], i32 1, i32 [[VAL0_I2]] ; DEFAULT-NEXT: [[INDEX_IS_3:%.*]] = icmp eq i32 [[INDEX]], 3 -; DEFAULT-NEXT: [[VAL0_I3:%.*]] = extractelement <4 x i32> [[VAL0]], i32 3 +; DEFAULT-NEXT: [[VAL0_I3:%.*]] = extractelement <4 x i32> [[VAL0]], i64 3 ; DEFAULT-NEXT: [[VAL1_I3:%.*]] = select i1 [[INDEX_IS_3]], i32 1, i32 [[VAL0_I3]] ; DEFAULT-NEXT: [[VAL2_I0:%.*]] = shl i32 1, [[VAL1_I0]] ; DEFAULT-NEXT: [[VAL2_I1:%.*]] = shl i32 2, [[VAL1_I1]] ; DEFAULT-NEXT: [[VAL2_I2:%.*]] = shl i32 3, [[VAL1_I2]] ; DEFAULT-NEXT: [[VAL2_I3:%.*]] = shl i32 4, [[VAL1_I3]] -; DEFAULT-NEXT: [[VAL2_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[VAL2_I0]], i32 0 -; DEFAULT-NEXT: [[VAL2_UPTO1:%.*]] = insertelement <4 x i32> [[VAL2_UPTO0]], i32 [[VAL2_I1]], i32 1 -; DEFAULT-NEXT: [[VAL2_UPTO2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO1]], i32 [[VAL2_I2]], i32 2 -; DEFAULT-NEXT: [[VAL2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO2]], i32 [[VAL2_I3]], i32 3 +; DEFAULT-NEXT: [[VAL2_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[VAL2_I0]], i64 0 +; DEFAULT-NEXT: [[VAL2_UPTO1:%.*]] = insertelement <4 x i32> [[VAL2_UPTO0]], i32 [[VAL2_I1]], i64 1 +; DEFAULT-NEXT: [[VAL2_UPTO2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO1]], i32 [[VAL2_I2]], i64 2 +; DEFAULT-NEXT: [[VAL2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO2]], i32 [[VAL2_I3]], i64 3 ; DEFAULT-NEXT: store <4 x i32> [[VAL2]], ptr [[DEST:%.*]], align 16 ; DEFAULT-NEXT: ret void ; ; OFF-LABEL: @f2( ; OFF-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16 ; OFF-NEXT: [[VAL1:%.*]] = insertelement <4 x i32> [[VAL0]], i32 1, i32 [[INDEX:%.*]] -; OFF-NEXT: [[VAL1_I0:%.*]] = extractelement <4 x i32> [[VAL1]], i32 0 +; OFF-NEXT: [[VAL1_I0:%.*]] = extractelement <4 x i32> [[VAL1]], i64 0 ; OFF-NEXT: [[VAL2_I0:%.*]] = shl i32 1, [[VAL1_I0]] -; OFF-NEXT: [[VAL1_I1:%.*]] = extractelement <4 x i32> [[VAL1]], i32 1 +; OFF-NEXT: [[VAL1_I1:%.*]] = extractelement <4 x i32> [[VAL1]], i64 1 ; OFF-NEXT: [[VAL2_I1:%.*]] = shl i32 2, [[VAL1_I1]] -; OFF-NEXT: [[VAL1_I2:%.*]] = extractelement <4 x i32> [[VAL1]], i32 2 +; OFF-NEXT: [[VAL1_I2:%.*]] = extractelement <4 x i32> [[VAL1]], i64 2 ; OFF-NEXT: [[VAL2_I2:%.*]] = shl i32 3, [[VAL1_I2]] -; OFF-NEXT: [[VAL1_I3:%.*]] = extractelement <4 x i32> [[VAL1]], i32 3 +; OFF-NEXT: [[VAL1_I3:%.*]] = extractelement <4 x i32> [[VAL1]], i64 3 ; OFF-NEXT: [[VAL2_I3:%.*]] = shl i32 4, [[VAL1_I3]] -; OFF-NEXT: [[VAL2_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[VAL2_I0]], i32 0 -; OFF-NEXT: [[VAL2_UPTO1:%.*]] = insertelement <4 x i32> [[VAL2_UPTO0]], i32 [[VAL2_I1]], i32 1 -; OFF-NEXT: [[VAL2_UPTO2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO1]], i32 [[VAL2_I2]], i32 2 -; OFF-NEXT: [[VAL2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO2]], i32 [[VAL2_I3]], i32 3 +; OFF-NEXT: [[VAL2_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[VAL2_I0]], i64 0 +; OFF-NEXT: [[VAL2_UPTO1:%.*]] = insertelement <4 x i32> [[VAL2_UPTO0]], i32 [[VAL2_I1]], i64 1 +; OFF-NEXT: [[VAL2_UPTO2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO1]], i32 [[VAL2_I2]], i64 2 +; OFF-NEXT: [[VAL2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO2]], i32 [[VAL2_I3]], i64 3 ; OFF-NEXT: store <4 x i32> [[VAL2]], ptr [[DEST:%.*]], align 16 ; OFF-NEXT: ret void ; diff --git a/llvm/test/Transforms/Scalarizer/vector-gep.ll b/llvm/test/Transforms/Scalarizer/vector-gep.ll --- a/llvm/test/Transforms/Scalarizer/vector-gep.ll +++ b/llvm/test/Transforms/Scalarizer/vector-gep.ll @@ -13,13 +13,13 @@ ; CHECK-LABEL: @test1( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x ptr>, ptr @vec, align 32 -; CHECK-NEXT: [[DOTI0:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 0 +; CHECK-NEXT: [[DOTI0:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTI01:%.*]] = getelementptr i16, ptr [[DOTI0]], i16 1 -; CHECK-NEXT: [[DOTI1:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 1 +; CHECK-NEXT: [[DOTI1:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 1 ; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i16, ptr [[DOTI1]], i16 1 -; CHECK-NEXT: [[DOTI2:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 2 +; CHECK-NEXT: [[DOTI2:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 2 ; CHECK-NEXT: [[DOTI23:%.*]] = getelementptr i16, ptr [[DOTI2]], i16 1 -; CHECK-NEXT: [[DOTI3:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3 +; CHECK-NEXT: [[DOTI3:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 3 ; CHECK-NEXT: [[DOTI34:%.*]] = getelementptr i16, ptr [[DOTI3]], i16 1 ; CHECK-NEXT: ret void ; @@ -35,21 +35,15 @@ ; CHECK-LABEL: @test2( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x ptr>, ptr @vec, align 32 -; CHECK-NEXT: [[DOTI0:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 0 -; CHECK-NEXT: [[DOTI1:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 1 -; CHECK-NEXT: [[DOTI2:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 2 -; CHECK-NEXT: [[DOTI3:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3 +; CHECK-NEXT: [[DOTI0:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 0 +; CHECK-NEXT: [[DOTI1:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 1 +; CHECK-NEXT: [[DOTI2:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 2 +; CHECK-NEXT: [[DOTI3:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 3 ; CHECK-NEXT: [[INDEX:%.*]] = load i16, ptr @index, align 2 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[INDEX]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLAT_I0:%.*]] = extractelement <4 x i16> [[DOTSPLAT]], i32 0 -; CHECK-NEXT: [[DOTI01:%.*]] = getelementptr i16, ptr [[DOTI0]], i16 [[DOTSPLAT_I0]] -; CHECK-NEXT: [[DOTSPLAT_I1:%.*]] = extractelement <4 x i16> [[DOTSPLAT]], i32 1 -; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i16, ptr [[DOTI1]], i16 [[DOTSPLAT_I1]] -; CHECK-NEXT: [[DOTSPLAT_I2:%.*]] = extractelement <4 x i16> [[DOTSPLAT]], i32 2 -; CHECK-NEXT: [[DOTI23:%.*]] = getelementptr i16, ptr [[DOTI2]], i16 [[DOTSPLAT_I2]] -; CHECK-NEXT: [[DOTSPLAT_I3:%.*]] = extractelement <4 x i16> [[DOTSPLAT]], i32 3 -; CHECK-NEXT: [[DOTI34:%.*]] = getelementptr i16, ptr [[DOTI3]], i16 [[DOTSPLAT_I3]] +; CHECK-NEXT: [[DOTI01:%.*]] = getelementptr i16, ptr [[DOTI0]], i16 [[INDEX]] +; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i16, ptr [[DOTI1]], i16 [[INDEX]] +; CHECK-NEXT: [[DOTI23:%.*]] = getelementptr i16, ptr [[DOTI2]], i16 [[INDEX]] +; CHECK-NEXT: [[DOTI34:%.*]] = getelementptr i16, ptr [[DOTI3]], i16 [[INDEX]] ; CHECK-NEXT: ret void ; bb: @@ -77,20 +71,14 @@ ; CHECK-LABEL: @test3_constbase( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[OFFSET:%.*]] = getelementptr [4 x i16], ptr @ptr, i16 0, i16 [[IDX:%.*]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[OFFSET]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLAT_I0:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i32 0 -; CHECK-NEXT: [[GEP_I0:%.*]] = getelementptr i16, ptr [[DOTSPLAT_I0]], i16 0 -; CHECK-NEXT: [[DOTSPLAT_I1:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i32 1 -; CHECK-NEXT: [[GEP_I1:%.*]] = getelementptr i16, ptr [[DOTSPLAT_I1]], i16 1 -; CHECK-NEXT: [[DOTSPLAT_I2:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i32 2 -; CHECK-NEXT: [[GEP_I2:%.*]] = getelementptr i16, ptr [[DOTSPLAT_I2]], i16 2 -; CHECK-NEXT: [[DOTSPLAT_I3:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i32 3 -; CHECK-NEXT: [[GEP_I3:%.*]] = getelementptr i16, ptr [[DOTSPLAT_I3]], i16 3 -; CHECK-NEXT: [[GEP_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[GEP_I0]], i32 0 -; CHECK-NEXT: [[GEP_UPTO1:%.*]] = insertelement <4 x ptr> [[GEP_UPTO0]], ptr [[GEP_I1]], i32 1 -; CHECK-NEXT: [[GEP_UPTO2:%.*]] = insertelement <4 x ptr> [[GEP_UPTO1]], ptr [[GEP_I2]], i32 2 -; CHECK-NEXT: [[GEP:%.*]] = insertelement <4 x ptr> [[GEP_UPTO2]], ptr [[GEP_I3]], i32 3 +; CHECK-NEXT: [[GEP_I0:%.*]] = getelementptr i16, ptr [[OFFSET]], i16 0 +; CHECK-NEXT: [[GEP_I1:%.*]] = getelementptr i16, ptr [[OFFSET]], i16 1 +; CHECK-NEXT: [[GEP_I2:%.*]] = getelementptr i16, ptr [[OFFSET]], i16 2 +; CHECK-NEXT: [[GEP_I3:%.*]] = getelementptr i16, ptr [[OFFSET]], i16 3 +; CHECK-NEXT: [[GEP_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[GEP_I0]], i64 0 +; CHECK-NEXT: [[GEP_UPTO1:%.*]] = insertelement <4 x ptr> [[GEP_UPTO0]], ptr [[GEP_I1]], i64 1 +; CHECK-NEXT: [[GEP_UPTO2:%.*]] = insertelement <4 x ptr> [[GEP_UPTO1]], ptr [[GEP_I2]], i64 2 +; CHECK-NEXT: [[GEP:%.*]] = insertelement <4 x ptr> [[GEP_UPTO2]], ptr [[GEP_I3]], i64 3 ; CHECK-NEXT: ret <4 x ptr> [[GEP]] ; bb: @@ -103,18 +91,18 @@ define <4 x ptr> @test3_varoffset(<4 x i16> %offset) { ; CHECK-LABEL: @test3_varoffset( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[OFFSET_I0:%.*]] = extractelement <4 x i16> [[OFFSET:%.*]], i32 0 +; CHECK-NEXT: [[OFFSET_I0:%.*]] = extractelement <4 x i16> [[OFFSET:%.*]], i64 0 ; CHECK-NEXT: [[GEP_I0:%.*]] = getelementptr i16, ptr @ptr, i16 [[OFFSET_I0]] -; CHECK-NEXT: [[OFFSET_I1:%.*]] = extractelement <4 x i16> [[OFFSET]], i32 1 +; CHECK-NEXT: [[OFFSET_I1:%.*]] = extractelement <4 x i16> [[OFFSET]], i64 1 ; CHECK-NEXT: [[GEP_I1:%.*]] = getelementptr i16, ptr @ptr, i16 [[OFFSET_I1]] -; CHECK-NEXT: [[OFFSET_I2:%.*]] = extractelement <4 x i16> [[OFFSET]], i32 2 +; CHECK-NEXT: [[OFFSET_I2:%.*]] = extractelement <4 x i16> [[OFFSET]], i64 2 ; CHECK-NEXT: [[GEP_I2:%.*]] = getelementptr i16, ptr @ptr, i16 [[OFFSET_I2]] -; CHECK-NEXT: [[OFFSET_I3:%.*]] = extractelement <4 x i16> [[OFFSET]], i32 3 +; CHECK-NEXT: [[OFFSET_I3:%.*]] = extractelement <4 x i16> [[OFFSET]], i64 3 ; CHECK-NEXT: [[GEP_I3:%.*]] = getelementptr i16, ptr @ptr, i16 [[OFFSET_I3]] -; CHECK-NEXT: [[GEP_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[GEP_I0]], i32 0 -; CHECK-NEXT: [[GEP_UPTO1:%.*]] = insertelement <4 x ptr> [[GEP_UPTO0]], ptr [[GEP_I1]], i32 1 -; CHECK-NEXT: [[GEP_UPTO2:%.*]] = insertelement <4 x ptr> [[GEP_UPTO1]], ptr [[GEP_I2]], i32 2 -; CHECK-NEXT: [[GEP:%.*]] = insertelement <4 x ptr> [[GEP_UPTO2]], ptr [[GEP_I3]], i32 3 +; CHECK-NEXT: [[GEP_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[GEP_I0]], i64 0 +; CHECK-NEXT: [[GEP_UPTO1:%.*]] = insertelement <4 x ptr> [[GEP_UPTO0]], ptr [[GEP_I1]], i64 1 +; CHECK-NEXT: [[GEP_UPTO2:%.*]] = insertelement <4 x ptr> [[GEP_UPTO1]], ptr [[GEP_I2]], i64 2 +; CHECK-NEXT: [[GEP:%.*]] = insertelement <4 x ptr> [[GEP_UPTO2]], ptr [[GEP_I3]], i64 3 ; CHECK-NEXT: ret <4 x ptr> [[GEP]] ; bb: @@ -127,16 +115,10 @@ ; CHECK-LABEL: @test4( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr @ptrptr, align 8 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLAT_I0:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i32 0 -; CHECK-NEXT: [[DOTI0:%.*]] = getelementptr i16, ptr [[DOTSPLAT_I0]], i16 0 -; CHECK-NEXT: [[DOTSPLAT_I1:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i32 1 -; CHECK-NEXT: [[DOTI1:%.*]] = getelementptr i16, ptr [[DOTSPLAT_I1]], i16 1 -; CHECK-NEXT: [[DOTSPLAT_I2:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i32 2 -; CHECK-NEXT: [[DOTI2:%.*]] = getelementptr i16, ptr [[DOTSPLAT_I2]], i16 2 -; CHECK-NEXT: [[DOTSPLAT_I3:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i32 3 -; CHECK-NEXT: [[DOTI3:%.*]] = getelementptr i16, ptr [[DOTSPLAT_I3]], i16 3 +; CHECK-NEXT: [[DOTI0:%.*]] = getelementptr i16, ptr [[TMP0]], i16 0 +; CHECK-NEXT: [[DOTI1:%.*]] = getelementptr i16, ptr [[TMP0]], i16 1 +; CHECK-NEXT: [[DOTI2:%.*]] = getelementptr i16, ptr [[TMP0]], i16 2 +; CHECK-NEXT: [[DOTI3:%.*]] = getelementptr i16, ptr [[TMP0]], i16 3 ; CHECK-NEXT: ret void ; bb: @@ -151,13 +133,13 @@ ; CHECK-LABEL: @test5( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x ptr>, ptr @vec, align 32 -; CHECK-NEXT: [[DOTI0:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 0 +; CHECK-NEXT: [[DOTI0:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTI01:%.*]] = getelementptr inbounds i16, ptr [[DOTI0]], i16 1 -; CHECK-NEXT: [[DOTI1:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 1 +; CHECK-NEXT: [[DOTI1:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 1 ; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr inbounds i16, ptr [[DOTI1]], i16 1 -; CHECK-NEXT: [[DOTI2:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 2 +; CHECK-NEXT: [[DOTI2:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 2 ; CHECK-NEXT: [[DOTI23:%.*]] = getelementptr inbounds i16, ptr [[DOTI2]], i16 1 -; CHECK-NEXT: [[DOTI3:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3 +; CHECK-NEXT: [[DOTI3:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 3 ; CHECK-NEXT: [[DOTI34:%.*]] = getelementptr inbounds i16, ptr [[DOTI3]], i16 1 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/Scalarizer/vector-of-pointer-to-vector.ll b/llvm/test/Transforms/Scalarizer/vector-of-pointer-to-vector.ll --- a/llvm/test/Transforms/Scalarizer/vector-of-pointer-to-vector.ll +++ b/llvm/test/Transforms/Scalarizer/vector-of-pointer-to-vector.ll @@ -5,10 +5,10 @@ define <1 x i32> @f1(<1 x ptr> %src, i32 %index) { ; CHECK-LABEL: @f1( ; CHECK-NEXT: [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0 -; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <1 x ptr> [[SRC:%.*]], i32 0 +; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <1 x ptr> [[SRC:%.*]], i64 0 ; CHECK-NEXT: [[DOTUPTO0:%.*]] = select i1 [[INDEX_IS_0]], ptr [[SRC_I0]], ptr poison ; CHECK-NEXT: [[DOTI0:%.*]] = load i32, ptr [[DOTUPTO0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <1 x i32> poison, i32 [[DOTI0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <1 x i32> poison, i32 [[DOTI0]], i64 0 ; CHECK-NEXT: ret <1 x i32> [[TMP1]] ; %1 = extractelement <1 x ptr> %src, i32 %index @@ -18,9 +18,9 @@ define <1 x i32> @f1b(<1 x ptr> %src) { ; CHECK-LABEL: @f1b( -; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <1 x ptr> [[SRC:%.*]], i32 0 +; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <1 x ptr> [[SRC:%.*]], i64 0 ; CHECK-NEXT: [[DOTI0:%.*]] = load i32, ptr [[SRC_I0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <1 x i32> poison, i32 [[DOTI0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <1 x i32> poison, i32 [[DOTI0]], i64 0 ; CHECK-NEXT: ret <1 x i32> [[TMP1]] ; %1 = extractelement <1 x ptr> %src, i32 0 @@ -31,13 +31,13 @@ define <2 x i32> @f2(<1 x ptr> %src, i32 %index) { ; CHECK-LABEL: @f2( ; CHECK-NEXT: [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0 -; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <1 x ptr> [[SRC:%.*]], i32 0 +; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <1 x ptr> [[SRC:%.*]], i64 0 ; CHECK-NEXT: [[DOTUPTO0:%.*]] = select i1 [[INDEX_IS_0]], ptr [[SRC_I0]], ptr poison ; CHECK-NEXT: [[DOTUPTO0_I1:%.*]] = getelementptr i32, ptr [[DOTUPTO0]], i32 1 ; CHECK-NEXT: [[DOTI0:%.*]] = load i32, ptr [[DOTUPTO0]], align 4 ; CHECK-NEXT: [[DOTI1:%.*]] = load i32, ptr [[DOTUPTO0_I1]], align 4 -; CHECK-NEXT: [[DOTUPTO01:%.*]] = insertelement <2 x i32> poison, i32 [[DOTI0]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[DOTUPTO01]], i32 [[DOTI1]], i32 1 +; CHECK-NEXT: [[DOTUPTO01:%.*]] = insertelement <2 x i32> poison, i32 [[DOTI0]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[DOTUPTO01]], i32 [[DOTI1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %1 = extractelement <1 x ptr> %src, i32 %index @@ -47,12 +47,12 @@ define <2 x i32> @f2b(<1 x ptr> %src) { ; CHECK-LABEL: @f2b( -; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <1 x ptr> [[SRC:%.*]], i32 0 +; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <1 x ptr> [[SRC:%.*]], i64 0 ; CHECK-NEXT: [[SRC_I0_I1:%.*]] = getelementptr i32, ptr [[SRC_I0]], i32 1 ; CHECK-NEXT: [[DOTI0:%.*]] = load i32, ptr [[SRC_I0]], align 4 ; CHECK-NEXT: [[DOTI1:%.*]] = load i32, ptr [[SRC_I0_I1]], align 4 -; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[DOTI0]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[DOTUPTO0]], i32 [[DOTI1]], i32 1 +; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[DOTI0]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[DOTUPTO0]], i32 [[DOTI1]], i64 1 ; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %1 = extractelement <1 x ptr> %src, i32 0 @@ -62,10 +62,10 @@ define void @f3(<1 x ptr> %src, i32 %index, <2 x i32> %val) { ; CHECK-LABEL: @f3( -; CHECK-NEXT: [[VAL_I0:%.*]] = extractelement <2 x i32> [[VAL:%.*]], i32 0 -; CHECK-NEXT: [[VAL_I1:%.*]] = extractelement <2 x i32> [[VAL]], i32 1 +; CHECK-NEXT: [[VAL_I0:%.*]] = extractelement <2 x i32> [[VAL:%.*]], i64 0 +; CHECK-NEXT: [[VAL_I1:%.*]] = extractelement <2 x i32> [[VAL]], i64 1 ; CHECK-NEXT: [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0 -; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <1 x ptr> [[SRC:%.*]], i32 0 +; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <1 x ptr> [[SRC:%.*]], i64 0 ; CHECK-NEXT: [[DOTUPTO0:%.*]] = select i1 [[INDEX_IS_0]], ptr [[SRC_I0]], ptr poison ; CHECK-NEXT: [[DOTUPTO0_I1:%.*]] = getelementptr i32, ptr [[DOTUPTO0]], i32 1 ; CHECK-NEXT: store i32 [[VAL_I0]], ptr [[DOTUPTO0]], align 4 @@ -79,9 +79,9 @@ define void @f3b(<1 x ptr> %src, <2 x i32> %val) { ; CHECK-LABEL: @f3b( -; CHECK-NEXT: [[VAL_I0:%.*]] = extractelement <2 x i32> [[VAL:%.*]], i32 0 -; CHECK-NEXT: [[VAL_I1:%.*]] = extractelement <2 x i32> [[VAL]], i32 1 -; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <1 x ptr> [[SRC:%.*]], i32 0 +; CHECK-NEXT: [[VAL_I0:%.*]] = extractelement <2 x i32> [[VAL:%.*]], i64 0 +; CHECK-NEXT: [[VAL_I1:%.*]] = extractelement <2 x i32> [[VAL]], i64 1 +; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <1 x ptr> [[SRC:%.*]], i64 0 ; CHECK-NEXT: [[SRC_I0_I1:%.*]] = getelementptr i32, ptr [[SRC_I0]], i32 1 ; CHECK-NEXT: store i32 [[VAL_I0]], ptr [[SRC_I0]], align 4 ; CHECK-NEXT: store i32 [[VAL_I1]], ptr [[SRC_I0_I1]], align 4