Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -522,8 +522,7 @@ /// instructions may be created to extract the result from the given intrinsic /// memory operation. Returns nullptr if the target cannot create a result /// from the given intrinsic. - Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, - Type *ExpectedType) const; + Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst) const; /// \returns True if the two functions have compatible attributes for inlining /// purposes. @@ -630,8 +629,7 @@ virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef Tys) = 0; virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) = 0; - virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, - Type *ExpectedType) = 0; + virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst) = 0; virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const = 0; }; @@ -822,9 +820,8 @@ MemIntrinsicInfo &Info) override { return Impl.getTgtMemIntrinsic(Inst, Info); } - Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, - Type *ExpectedType) override { - return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); + Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst) override { + return Impl.getOrCreateResultFromMemIntrinsic(Inst); } bool areInlineCompatible(const Function *Caller, const Function *Callee) const override { Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -333,8 +333,7 @@ return false; } - Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, - Type *ExpectedType) { + Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst) { return nullptr; } Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -332,8 +332,8 @@ } Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( - IntrinsicInst *Inst, Type *ExpectedType) const { - return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); + IntrinsicInst *Inst) const { + return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst); } bool TargetTransformInfo::areInlineCompatible(const Function *Caller, Index: lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.h +++ lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -119,8 +119,7 @@ void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); - Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, - Type *ExpectedType); + Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst); bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -492,26 +492,25 @@ UP.PartialOptSizeThreshold = 0; } -Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, - Type *ExpectedType) { +Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst) { switch (Inst->getIntrinsicID()) { default: return nullptr; case Intrinsic::aarch64_neon_st2: case Intrinsic::aarch64_neon_st3: case Intrinsic::aarch64_neon_st4: { - // Create a struct type - StructType *ST = dyn_cast(ExpectedType); - if (!ST) - return nullptr; unsigned NumElts = Inst->getNumArgOperands() - 1; - if (ST->getNumElements() != NumElts) - return nullptr; - for (unsigned i = 0, e = NumElts; i != e; ++i) { - if (Inst->getArgOperand(i)->getType() != ST->getElementType(i)) - return nullptr; - } - Value *Res = UndefValue::get(ExpectedType); + SmallVector ElementTypes; + ElementTypes.reserve(NumElts); + for (unsigned i = 0; i < NumElts; ++i) + ElementTypes.push_back(Inst->getArgOperand(i)->getType()); + auto *ST = StructType::get(Inst->getContext(), ElementTypes); + + // FIXME: The need to generate new IR here is unfortunate. One consequence + // of this is that EarlyCSE's trivial writeback DSE will never kick in + // since the resulting value will never be pointer equivelent to an + // existing one. + Value *Res = UndefValue::get(ST); IRBuilder<> Builder(Inst); for (unsigned i = 0, e = NumElts; i != e; ++i) { Value *L = Inst->getArgOperand(i); @@ -522,9 +521,7 @@ case Intrinsic::aarch64_neon_ld2: case Intrinsic::aarch64_neon_ld3: case Intrinsic::aarch64_neon_ld4: - if (Inst->getType() == ExpectedType) - return Inst; - return nullptr; + return Inst; } } Index: lib/Transforms/Scalar/EarlyCSE.cpp =================================================================== --- lib/Transforms/Scalar/EarlyCSE.cpp +++ lib/Transforms/Scalar/EarlyCSE.cpp @@ -496,14 +496,19 @@ bool processNode(DomTreeNode *Node); - Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const { + /// Return the value being stored to or loaded from memory. Note that the + /// result type may differ from the pointee type of the pointer operand. + /// This is already true of target intrinsics, but will become true for + /// normal stores and loads as well once typeless pointers are pushed through + /// the IR. Also note that this routine may end up generating new IR + /// immediately before 'Inst' (for target intrinsics). + Value *getOrCreateResult(Value *Inst) const { if (LoadInst *LI = dyn_cast(Inst)) return LI; else if (StoreInst *SI = dyn_cast(Inst)) return SI->getValueOperand(); - assert(isa(Inst) && "Instruction not supported"); - return TTI.getOrCreateResultFromMemIntrinsic(cast(Inst), - ExpectedType); + auto *II = cast(Inst); + return TTI.getOrCreateResultFromMemIntrinsic(II); } }; } @@ -623,8 +628,8 @@ !MemInst.isVolatile() && MemInst.isUnordered() && // We can't replace an atomic load with one which isn't also atomic. InVal.IsAtomic >= MemInst.isAtomic()) { - Value *Op = getOrCreateResult(InVal.Data, Inst->getType()); - if (Op != nullptr) { + Value *Op = getOrCreateResult(InVal.Data); + if (Op != nullptr && Op->getType() == Inst->getType()) { DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: " << *InVal.Data << '\n'); if (!Inst->use_empty()) @@ -695,7 +700,7 @@ if (MemInst.isValid() && MemInst.isStore()) { LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); if (InVal.Data && - InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) && + InVal.Data == getOrCreateResult(Inst) && InVal.Generation == CurrentGeneration && InVal.MatchingId == MemInst.getMatchingId() && // We don't yet handle removing stores with ordering of any kind.