Index: lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- lib/CodeGen/AtomicExpandPass.cpp +++ lib/CodeGen/AtomicExpandPass.cpp @@ -268,8 +268,7 @@ IRBuilder<> Builder(LI); AtomicOrdering Order = LI->getOrdering(); Value *Addr = LI->getPointerOperand(); - Type *Ty = cast(Addr->getType())->getElementType(); - Constant *DummyVal = Constant::getNullValue(Ty); + Constant *DummyVal = Constant::getNullValue(LI->getType()); Value *Pair = Builder.CreateAtomicCmpXchg( Addr, DummyVal, DummyVal, Order, Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -3432,7 +3432,7 @@ /// Add the ultimately found memory instructions to MemoryUses. static bool FindAllMemoryUses( Instruction *I, - SmallVectorImpl> &MemoryUses, + SmallVectorImpl &MemoryUses, SmallPtrSetImpl &ConsideredInsts, const TargetMachine &TM) { // If we already considered this instruction, we're done. if (!ConsideredInsts.insert(I).second) @@ -3447,14 +3447,14 @@ Instruction *UserI = cast(U.getUser()); if (LoadInst *LI = dyn_cast(UserI)) { - MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); + MemoryUses.push_back(LI); continue; } if (StoreInst *SI = dyn_cast(UserI)) { unsigned opNo = U.getOperandNo(); if (opNo == 0) return true; // Storing addr, not into addr. - MemoryUses.push_back(std::make_pair(SI, opNo)); + MemoryUses.push_back(SI); continue; } @@ -3554,7 +3554,7 @@ // check to see if their addressing modes will include this instruction. If // so, we can fold it into all uses, so it doesn't matter if it has multiple // uses. - SmallVector, 16> MemoryUses; + SmallVector MemoryUses; SmallPtrSet ConsideredInsts; if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM)) return false; // Has a non-memory, non-foldable use! @@ -3565,17 +3565,21 @@ // *actually* fold the instruction. SmallVector MatchedAddrModeInsts; for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { - Instruction *User = MemoryUses[i].first; - unsigned OpNo = MemoryUses[i].second; - - // Get the access type of this use. If the use isn't a pointer, we don't - // know what it accesses. - Value *Address = User->getOperand(OpNo); - PointerType *AddrTy = dyn_cast(Address->getType()); - if (!AddrTy) - return false; - Type *AddressAccessTy = AddrTy->getElementType(); - unsigned AS = AddrTy->getAddressSpace(); + Value *Address; + Type *AddressAccessTy; + unsigned AS; + + Instruction *User = MemoryUses[i]; + if (auto *LI = dyn_cast(User)) { + Address = LI->getPointerOperand(); + AddressAccessTy = LI->getType(); + AS = LI->getPointerAddressSpace(); + } else { + auto *SI = cast(User); + Address = SI->getPointerOperand(); + AddressAccessTy = SI->getValueOperand()->getType(); + AS = SI->getPointerAddressSpace(); + } // Do a match against the root of this address, ignoring profitability. This // will tell us if the addressing mode for the memory operation will Index: lib/Target/AMDGPU/SITypeRewriter.cpp =================================================================== --- lib/Target/AMDGPU/SITypeRewriter.cpp +++ lib/Target/AMDGPU/SITypeRewriter.cpp @@ -74,9 +74,8 @@ void SITypeRewriter::visitLoadInst(LoadInst &I) { Value *Ptr = I.getPointerOperand(); Type *PtrTy = Ptr->getType(); - Type *ElemTy = PtrTy->getPointerElementType(); IRBuilder<> Builder(&I); - if (ElemTy == v16i8) { + if (I.getType() == v16i8) { Value *BitCast = Builder.CreateBitCast(Ptr, PointerType::get(v4i32,PtrTy->getPointerAddressSpace())); LoadInst *Load = Builder.CreateLoad(BitCast); Index: lib/Target/PowerPC/PPCLoopPreIncPrep.cpp =================================================================== --- lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -183,14 +183,17 @@ for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { Value *PtrValue; + Type *ValTy = nullptr; Instruction *MemI; if (LoadInst *LMemI = dyn_cast(J)) { MemI = LMemI; PtrValue = LMemI->getPointerOperand(); + ValTy = LMemI->getType(); } else if (StoreInst *SMemI = dyn_cast(J)) { MemI = SMemI; PtrValue = SMemI->getPointerOperand(); + ValTy = SMemI->getValueOperand()->getType(); } else if (IntrinsicInst *IMemI = dyn_cast(J)) { if (IMemI->getIntrinsicID() == Intrinsic::prefetch) { MemI = IMemI; @@ -203,8 +206,7 @@ continue; // There are no update forms for Altivec vector load/stores. - if (ST && ST->hasAltivec() && - PtrValue->getType()->getPointerElementType()->isVectorTy()) + if (ST && ST->hasAltivec() && ValTy && ValTy->isVectorTy()) continue; if (L->isLoopInvariant(PtrValue)) Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -19543,9 +19543,9 @@ // FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b. TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { - auto PTy = cast(LI->getPointerOperand()->getType()); - return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg - : AtomicExpansionKind::None; + return needsCmpXchgNb(LI->getType()) + ? AtomicExpansionKind::CmpXChg + : AtomicExpansionKind::None; } TargetLowering::AtomicExpansionKind Index: lib/Transforms/IPO/GlobalOpt.cpp =================================================================== --- lib/Transforms/IPO/GlobalOpt.cpp +++ lib/Transforms/IPO/GlobalOpt.cpp @@ -2188,10 +2188,10 @@ /// another pointer type, we punt. We basically just support direct accesses to /// globals and GEP's of globals. This should be kept up to date with /// CommitValueTo. -static bool isSimpleEnoughPointerToCommit(Constant *C) { +static bool isSimpleEnoughPointerToCommit(Constant *C, Type *ValTy) { // Conservatively, avoid aggregate types. This is because we don't // want to worry about them partially overlapping other stores. - if (!cast(C->getType())->getElementType()->isSingleValueType()) + if (!ValTy->isSingleValueType()) return false; if (GlobalVariable *GV = dyn_cast(C)) @@ -2428,19 +2428,20 @@ DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); return false; // no volatile/atomic accesses. } - Constant *Ptr = getVal(SI->getOperand(1)); + Constant *Ptr = getVal(SI->getPointerOperand()); if (ConstantExpr *CE = dyn_cast(Ptr)) { DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); Ptr = ConstantFoldConstantExpression(CE, DL, TLI); DEBUG(dbgs() << "; To: " << *Ptr << "\n"); } - if (!isSimpleEnoughPointerToCommit(Ptr)) { + Type *ValTy = SI->getValueOperand()->getType(); + if (!isSimpleEnoughPointerToCommit(Ptr, ValTy)) { // If this is too complex for us to commit, reject it. DEBUG(dbgs() << "Pointer is too complex for us to evaluate store."); return false; } - Constant *Val = getVal(SI->getOperand(0)); + Constant *Val = getVal(SI->getValueOperand()); // If this might be too difficult for the backend to handle (e.g. the addr // of one global variable divided by another) then we can't commit it. Index: lib/Transforms/Instrumentation/ThreadSanitizer.cpp =================================================================== --- lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -94,7 +94,7 @@ SmallVectorImpl &All, const DataLayout &DL); bool addrPointsToConstantData(Value *Addr); - int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL); + int getMemoryAccessFuncIndex(Type *OrigTy, const DataLayout &DL); Type *IntptrTy; IntegerType *OrdTy; @@ -410,7 +410,10 @@ Value *Addr = IsWrite ? cast(I)->getPointerOperand() : cast(I)->getPointerOperand(); - int Idx = getMemoryAccessFuncIndex(Addr, DL); + Type *OrigTy = IsWrite + ? cast(I)->getValueOperand()->getType() + : cast(I)->getType(); + int Idx = getMemoryAccessFuncIndex(OrigTy, DL); if (Idx < 0) return false; if (IsWrite && isVtableAccess(I)) { @@ -440,7 +443,6 @@ const unsigned Alignment = IsWrite ? cast(I)->getAlignment() : cast(I)->getAlignment(); - Type *OrigTy = cast(Addr->getType())->getElementType(); const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); Value *OnAccessFunc = nullptr; if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0) @@ -508,7 +510,7 @@ IRBuilder<> IRB(I); if (LoadInst *LI = dyn_cast(I)) { Value *Addr = LI->getPointerOperand(); - int Idx = getMemoryAccessFuncIndex(Addr, DL); + int Idx = getMemoryAccessFuncIndex(LI->getType(), DL); if (Idx < 0) return false; const unsigned ByteSize = 1U << Idx; @@ -522,7 +524,8 @@ } else if (StoreInst *SI = dyn_cast(I)) { Value *Addr = SI->getPointerOperand(); - int Idx = getMemoryAccessFuncIndex(Addr, DL); + Value *Val = SI->getValueOperand(); + int Idx = getMemoryAccessFuncIndex(Val->getType(), DL); if (Idx < 0) return false; const unsigned ByteSize = 1U << Idx; @@ -530,13 +533,14 @@ Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), - IRB.CreateIntCast(SI->getValueOperand(), Ty, false), + IRB.CreateIntCast(Val, Ty, false), createOrdering(&IRB, SI->getOrdering())}; CallInst *C = CallInst::Create(TsanAtomicStore[Idx], Args); ReplaceInstWithInst(I, C); } else if (AtomicRMWInst *RMWI = dyn_cast(I)) { Value *Addr = RMWI->getPointerOperand(); - int Idx = getMemoryAccessFuncIndex(Addr, DL); + Value *Val = RMWI->getValOperand(); + int Idx = getMemoryAccessFuncIndex(Val->getType(), DL); if (Idx < 0) return false; Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx]; @@ -547,13 +551,14 @@ Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), - IRB.CreateIntCast(RMWI->getValOperand(), Ty, false), + IRB.CreateIntCast(Val, Ty, false), createOrdering(&IRB, RMWI->getOrdering())}; CallInst *C = CallInst::Create(F, Args); ReplaceInstWithInst(I, C); } else if (AtomicCmpXchgInst *CASI = dyn_cast(I)) { Value *Addr = CASI->getPointerOperand(); - int Idx = getMemoryAccessFuncIndex(Addr, DL); + Value *CmpVal = CASI->getCompareOperand(); + int Idx = getMemoryAccessFuncIndex(CmpVal->getType(), DL); if (Idx < 0) return false; const unsigned ByteSize = 1U << Idx; @@ -561,12 +566,12 @@ Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), - IRB.CreateIntCast(CASI->getCompareOperand(), Ty, false), + IRB.CreateIntCast(CmpVal, Ty, false), IRB.CreateIntCast(CASI->getNewValOperand(), Ty, false), createOrdering(&IRB, CASI->getSuccessOrdering()), createOrdering(&IRB, CASI->getFailureOrdering())}; CallInst *C = IRB.CreateCall(TsanAtomicCAS[Idx], Args); - Value *Success = IRB.CreateICmpEQ(C, CASI->getCompareOperand()); + Value *Success = IRB.CreateICmpEQ(C, CmpVal); Value *Res = IRB.CreateInsertValue(UndefValue::get(CASI->getType()), C, 0); Res = IRB.CreateInsertValue(Res, Success, 1); @@ -583,10 +588,8 @@ return true; } -int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr, +int ThreadSanitizer::getMemoryAccessFuncIndex(Type *OrigTy, const DataLayout &DL) { - Type *OrigPtrTy = Addr->getType(); - Type *OrigTy = cast(OrigPtrTy)->getElementType(); assert(OrigTy->isSized()); uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); if (TypeSize != 8 && TypeSize != 16 && Index: lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -146,6 +146,9 @@ /// range. Value *StartPtr; + /// DestTy - The type used for writing to the start of the range. + Type *DestTy; + /// Alignment - The known alignment of the first store. unsigned Alignment; @@ -221,18 +224,24 @@ } void addStore(int64_t OffsetFromFirst, StoreInst *SI) { - int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType()); + Type *DestTy = SI->getValueOperand()->getType(); + int64_t StoreSize = DL.getTypeStoreSize(DestTy); addRange(OffsetFromFirst, StoreSize, - SI->getPointerOperand(), SI->getAlignment(), SI); + SI->getPointerOperand(), DestTy, + SI->getAlignment(), SI); } void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { + Type *DestTy = Type::getInt8Ty(MSI->getContext()); int64_t Size = cast(MSI->getLength())->getZExtValue(); - addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI); + + addRange(OffsetFromFirst, Size, MSI->getDest(), + DestTy, MSI->getAlignment(), MSI); } - void addRange(int64_t Start, int64_t Size, Value *Ptr, + void addRange(int64_t Start, int64_t Size, + Value *Ptr, Type *DestTy, unsigned Alignment, Instruction *Inst); }; @@ -243,7 +252,8 @@ /// Add a new store to the MemsetRanges data structure. This adds a /// new range for the specified store at the specified offset, merging into /// existing ranges as appropriate. -void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, +void MemsetRanges::addRange(int64_t Start, int64_t Size, + Value *Ptr, Type *DestTy, unsigned Alignment, Instruction *Inst) { int64_t End = Start+Size; @@ -258,6 +268,7 @@ R.Start = Start; R.End = End; R.StartPtr = Ptr; + R.DestTy = DestTy; R.Alignment = Alignment; R.TheStores.push_back(Inst); return; @@ -280,6 +291,7 @@ if (Start < I->Start) { I->Start = Start; I->StartPtr = Ptr; + I->DestTy = DestTy; I->Alignment = Alignment; } @@ -453,11 +465,8 @@ // Determine alignment unsigned Alignment = Range.Alignment; - if (Alignment == 0) { - Type *EltType = - cast(StartPtr->getType())->getElementType(); - Alignment = DL.getABITypeAlignment(EltType); - } + if (Alignment == 0) + Alignment = DL.getABITypeAlignment(Range.DestTy); AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -1750,13 +1750,13 @@ if (II->getIntrinsicID() != Intrinsic::lifetime_start && II->getIntrinsicID() != Intrinsic::lifetime_end) return false; - } else if (U->get()->getType()->getPointerElementType()->isStructTy()) { - // Disable vector promotion when there are loads or stores of an FCA. - return false; } else if (LoadInst *LI = dyn_cast(U->getUser())) { if (LI->isVolatile()) return false; Type *LTy = LI->getType(); + // Disable vector promotion when there are loads or stores of an FCA. + if (LTy->isStructTy()) + return false; if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { assert(LTy->isIntegerTy()); LTy = SplitIntTy; @@ -1767,6 +1767,9 @@ if (SI->isVolatile()) return false; Type *STy = SI->getValueOperand()->getType(); + // Disable vector promotion when there are loads or stores of an FCA. + if (STy->isStructTy()) + return false; if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { assert(STy->isIntegerTy()); STy = SplitIntTy; Index: lib/Transforms/Scalar/Scalarizer.cpp =================================================================== --- lib/Transforms/Scalar/Scalarizer.cpp +++ lib/Transforms/Scalar/Scalarizer.cpp @@ -49,7 +49,7 @@ // insert them before BBI in BB. If Cache is nonnull, use it to cache // the results. Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, - ValueVector *cachePtr = nullptr); + Type *VecTy, ValueVector *cachePtr = nullptr); // Return component I, creating a new Value for it if necessary. Value *operator[](unsigned I); @@ -161,7 +161,7 @@ } private: - Scatterer scatter(Instruction *, Value *); + Scatterer scatter(Instruction *, Value *, Type *VecTy = nullptr); void gather(Instruction *, const ValueVector &); bool canTransferMetadata(unsigned Kind); void transferMetadata(Instruction *, const ValueVector &); @@ -183,13 +183,10 @@ "Scalarize vector operations", false, false) Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, - ValueVector *cachePtr) + Type *VecTy, ValueVector *cachePtr) : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) { - Type *Ty = V->getType(); - PtrTy = dyn_cast(Ty); - if (PtrTy) - Ty = PtrTy->getElementType(); - Size = Ty->getVectorNumElements(); + PtrTy = dyn_cast(V->getType()); + Size = VecTy->getVectorNumElements(); if (!CachePtr) Tmp.resize(Size, nullptr); else if (CachePtr->empty()) @@ -268,24 +265,26 @@ // Return a scattered form of V that can be accessed by Point. V must be a // vector or a pointer to a vector. -Scatterer Scalarizer::scatter(Instruction *Point, Value *V) { +Scatterer Scalarizer::scatter(Instruction *Point, Value *V, Type *VecTy) { + if (!VecTy) + VecTy = V->getType(); if (Argument *VArg = dyn_cast(V)) { // Put the scattered form of arguments in the entry block, // so that it can be used everywhere. Function *F = VArg->getParent(); BasicBlock *BB = &F->getEntryBlock(); - return Scatterer(BB, BB->begin(), V, &Scattered[V]); + return Scatterer(BB, BB->begin(), V, VecTy, &Scattered[V]); } if (Instruction *VOp = dyn_cast(V)) { // Put the scattered form of an instruction directly after the // instruction. BasicBlock *BB = VOp->getParent(); return Scatterer(BB, std::next(BasicBlock::iterator(VOp)), - V, &Scattered[V]); + V, VecTy, &Scattered[V]); } // In the fallback case, just put the scattered before Point and // keep the result local to Point. - return Scatterer(Point->getParent(), Point->getIterator(), V); + return Scatterer(Point->getParent(), Point->getIterator(), V, VecTy); } // Replace Op with the gathered form of the components in CV. Defer the @@ -601,7 +600,7 @@ unsigned NumElems = Layout.VecTy->getNumElements(); IRBuilder<> Builder(&LI); - Scatterer Ptr = scatter(&LI, LI.getPointerOperand()); + Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), LI.getType()); ValueVector Res; Res.resize(NumElems); @@ -620,13 +619,14 @@ VectorLayout Layout; Value *FullValue = SI.getValueOperand(); - if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout, + Type *ValTy = FullValue->getType(); + if (!getVectorLayout(ValTy, SI.getAlignment(), Layout, SI.getModule()->getDataLayout())) return false; unsigned NumElems = Layout.VecTy->getNumElements(); IRBuilder<> Builder(&SI); - Scatterer Ptr = scatter(&SI, SI.getPointerOperand()); + Scatterer Ptr = scatter(&SI, SI.getPointerOperand(), ValTy); Scatterer Val = scatter(&SI, FullValue); ValueVector Stores; Index: lib/Transforms/Vectorize/BBVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/BBVectorize.cpp +++ lib/Transforms/Vectorize/BBVectorize.cpp @@ -610,7 +610,9 @@ // after I; if OffsetInElmts == -1 then I accesses the memory // directly after J. bool getPairPtrInfo(Instruction *I, Instruction *J, - Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment, + Value *&IPtr, Value *&JPtr, + Type *&ITy, Type *&JTy, + unsigned &IAlignment, unsigned &JAlignment, unsigned &IAddressSpace, unsigned &JAddressSpace, int64_t &OffsetInElmts, bool ComputeOffset = true) { OffsetInElmts = 0; @@ -618,6 +620,8 @@ LoadInst *LJ = cast(J); IPtr = LI->getPointerOperand(); JPtr = LJ->getPointerOperand(); + ITy = LI->getType(); + JTy = LJ->getType(); IAlignment = LI->getAlignment(); JAlignment = LJ->getAlignment(); IAddressSpace = LI->getPointerAddressSpace(); @@ -626,6 +630,8 @@ StoreInst *SI = cast(I), *SJ = cast(J); IPtr = SI->getPointerOperand(); JPtr = SJ->getPointerOperand(); + ITy = SI->getValueOperand()->getType(); + JTy = SJ->getValueOperand()->getType(); IAlignment = SI->getAlignment(); JAlignment = SJ->getAlignment(); IAddressSpace = SI->getPointerAddressSpace(); @@ -647,12 +653,10 @@ ConstantInt *IntOff = ConstOffSCEV->getValue(); int64_t Offset = IntOff->getSExtValue(); const DataLayout &DL = I->getModule()->getDataLayout(); - Type *VTy = IPtr->getType()->getPointerElementType(); - int64_t VTyTSS = (int64_t)DL.getTypeStoreSize(VTy); + int64_t VTyTSS = (int64_t)DL.getTypeStoreSize(ITy); - Type *VTy2 = JPtr->getType()->getPointerElementType(); - if (VTy != VTy2 && Offset < 0) { - int64_t VTy2TSS = (int64_t)DL.getTypeStoreSize(VTy2); + if (ITy != JTy && Offset < 0) { + int64_t VTy2TSS = (int64_t)DL.getTypeStoreSize(JTy); OffsetInElmts = Offset/VTy2TSS; return (std::abs(Offset) % VTy2TSS) == 0; } @@ -981,19 +985,16 @@ if (IsSimpleLoadStore) { Value *IPtr, *JPtr; + Type *aTypeI, *aTypeJ; unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace; int64_t OffsetInElmts = 0; - if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, + if (getPairPtrInfo(I, J, IPtr, JPtr, aTypeI, aTypeJ, IAlignment, JAlignment, IAddressSpace, JAddressSpace, OffsetInElmts) && std::abs(OffsetInElmts) == 1) { FixedOrder = (int) OffsetInElmts; unsigned BottomAlignment = IAlignment; if (OffsetInElmts < 0) BottomAlignment = JAlignment; - Type *aTypeI = isa(I) ? - cast(I)->getValueOperand()->getType() : I->getType(); - Type *aTypeJ = isa(J) ? - cast(J)->getValueOperand()->getType() : J->getType(); Type *VType = getVecTypeForPair(aTypeI, aTypeJ); if (Config.AlignedOnly) { @@ -2302,20 +2303,19 @@ Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context, Instruction *I, Instruction *J, unsigned o) { Value *IPtr, *JPtr; + Type *ArgTypeI, *ArgTypeJ; unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace; int64_t OffsetInElmts; // Note: the analysis might fail here, that is why the pair order has // been precomputed (OffsetInElmts must be unused here). - (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, + (void) getPairPtrInfo(I, J, IPtr, JPtr, ArgTypeI, ArgTypeJ, + IAlignment, JAlignment, IAddressSpace, JAddressSpace, OffsetInElmts, false); // The pointer value is taken to be the one with the lowest offset. Value *VPtr = IPtr; - - Type *ArgTypeI = IPtr->getType()->getPointerElementType(); - Type *ArgTypeJ = JPtr->getType()->getPointerElementType(); Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); Type *VArgPtrType = PointerType::get(VArgType, Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1236,7 +1236,7 @@ /// 0 - Stride is unknown or non-consecutive. /// 1 - Address is consecutive. /// -1 - Address is consecutive, and decreasing. - int isConsecutivePtr(Value *Ptr); + int isConsecutivePtr(Value *Ptr, Type *DataType = nullptr); /// Returns true if the value V is uniform within the loop. bool isUniform(Value *V); @@ -1275,12 +1275,12 @@ /// Returns true if the target machine supports masked store operation /// for the given \p DataType and kind of access to \p Ptr. bool isLegalMaskedStore(Type *DataType, Value *Ptr) { - return isConsecutivePtr(Ptr) && TTI->isLegalMaskedStore(DataType); + return isConsecutivePtr(Ptr, DataType) && TTI->isLegalMaskedStore(DataType); } /// Returns true if the target machine supports masked load operation /// for the given \p DataType and kind of access to \p Ptr. bool isLegalMaskedLoad(Type *DataType, Value *Ptr) { - return isConsecutivePtr(Ptr) && TTI->isLegalMaskedLoad(DataType); + return isConsecutivePtr(Ptr, DataType) && TTI->isLegalMaskedLoad(DataType); } /// Returns true if vector representation of the instruction \p I /// requires mask. @@ -1984,11 +1984,11 @@ return Builder.CreateAdd(Val, Step, "induction"); } -int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { +int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr, Type *DataType) { assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); auto *SE = PSE.getSE(); // Make sure that the pointer does not point to structs. - if (Ptr->getType()->getPointerElementType()->isAggregateType()) + if (DataType && DataType->isAggregateType()) return 0; // If this value is a pointer induction variable we know it is consecutive. @@ -2010,8 +2010,7 @@ if (Phi && Inductions.count(Phi)) { // Make sure that the pointer does not point to structs. - PointerType *GepPtrType = cast(GpPtr->getType()); - if (GepPtrType->getElementType()->isAggregateType()) + if (Gep->getSourceElementType()->isAggregateType()) return 0; // Make sure that all of the index operands are loop invariant. @@ -2381,7 +2380,7 @@ // If the pointer is loop invariant or if it is non-consecutive, // scalarize the load. - int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); + int ConsecutiveStride = Legal->isConsecutivePtr(Ptr, ScalarDataTy); bool Reverse = ConsecutiveStride < 0; bool UniformLoad = LI && Legal->isUniform(Ptr); if (!ConsecutiveStride || UniformLoad) @@ -4582,6 +4581,7 @@ Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); int Stride = isStridedPtr(PSE, Ptr, TheLoop, Strides); + Type *AccessTy = LI ? LI->getType() : SI->getValueOperand()->getType(); // The factor of the corresponding interleave group. unsigned Factor = std::abs(Stride); @@ -4591,13 +4591,12 @@ continue; const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); - PointerType *PtrTy = dyn_cast(Ptr->getType()); - unsigned Size = DL.getTypeAllocSize(PtrTy->getElementType()); + unsigned Size = DL.getTypeAllocSize(AccessTy); // An alignment of 0 means target ABI alignment. unsigned Align = LI ? LI->getAlignment() : SI->getAlignment(); if (!Align) - Align = DL.getABITypeAlignment(PtrTy->getElementType()); + Align = DL.getABITypeAlignment(AccessTy); StrideAccesses[I] = StrideDescriptor(Stride, Scev, Size, Align); } @@ -5480,7 +5479,7 @@ } // Scalarized loads/stores. - int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); + int ConsecutiveStride = Legal->isConsecutivePtr(Ptr, ValTy); bool Reverse = ConsecutiveStride < 0; const DataLayout &DL = I->getModule()->getDataLayout(); unsigned ScalarAllocatedSize = DL.getTypeAllocSize(ValTy); @@ -5623,12 +5622,14 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { // Check for a store. - if (StoreInst *ST = dyn_cast(Inst)) - return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0; + if (StoreInst *ST = dyn_cast(Inst)) { + Type *Ty = ST->getValueOperand()->getType(); + return Legal->isConsecutivePtr(ST->getPointerOperand(), Ty) != 0; + } // Check for a load. if (LoadInst *LI = dyn_cast(Inst)) - return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0; + return Legal->isConsecutivePtr(LI->getPointerOperand(), LI->getType()) != 0; return false; } Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -442,6 +442,10 @@ /// \returns NULL if this is not a valid Load/Store instruction. static Value *getPointerOperand(Value *I); + /// \brief Take the acccessed type from the Load/Store instruction. + /// \returns NULL if this is not a valid Load/Store instruction. + static Type *getAccessType(Value *I); + /// \brief Take the address space operand from the Load/Store instruction. /// \returns -1 if this is not a valid Load/Store instruction. static unsigned getAddressSpaceOperand(Value *I); @@ -1845,6 +1849,14 @@ return nullptr; } +Type *BoUpSLP::getAccessType(Value *I) { + if (LoadInst *LI = dyn_cast(I)) + return LI->getType(); + if (StoreInst *SI = dyn_cast(I)) + return SI->getValueOperand()->getType(); + return nullptr; +} + unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { if (LoadInst *L = dyn_cast(I)) return L->getPointerAddressSpace(); @@ -1864,11 +1876,13 @@ return false; // Make sure that A and B are different pointers of the same type. - if (PtrA == PtrB || PtrA->getType() != PtrB->getType()) + Type *TyA = getAccessType(A); + Type *TyB = getAccessType(B); + if (PtrA == PtrB || TyA != TyB) return false; unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); - Type *Ty = cast(PtrA->getType())->getElementType(); + Type *Ty = TyA; APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty)); APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);