Index: include/llvm/Transforms/Utils/Local.h =================================================================== --- include/llvm/Transforms/Utils/Local.h +++ include/llvm/Transforms/Utils/Local.h @@ -380,6 +380,14 @@ /// during lowering by the GC infrastructure. bool callsGCLeafFunction(ImmutableCallSite CS); +/// Copy metadata from load instructions between load instructions. +/// +/// sameValue - copy metadata about the value (!nonnull, !range) +/// sameAddress - copy metadata about the address (!noalias, !invariant, etc.) +void copyLoadMetadata(const DataLayout &DL, + LoadInst &NewLI, const LoadInst &OldLI, + bool sameValue, bool sameAddress); + //===----------------------------------------------------------------------===// // Intrinsic pattern matching // Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -439,81 +439,15 @@ const Twine &Suffix = "") { assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) && "can't fold an atomic load to requested type"); - + Value *Ptr = LI.getPointerOperand(); unsigned AS = LI.getPointerAddressSpace(); - SmallVector, 8> MD; - LI.getAllMetadata(MD); LoadInst *NewLoad = IC.Builder->CreateAlignedLoad( IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)), LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix); NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); - MDBuilder MDB(NewLoad->getContext()); - for (const auto &MDPair : MD) { - unsigned ID = MDPair.first; - MDNode *N = MDPair.second; - // Note, essentially every kind of metadata should be preserved here! This - // routine is supposed to clone a load instruction changing *only its type*. - // The only metadata it makes sense to drop is metadata which is invalidated - // when the pointer type changes. This should essentially never be the case - // in LLVM, but we explicitly switch over only known metadata to be - // conservatively correct. If you are adding metadata to LLVM which pertains - // to loads, you almost certainly want to add it here. - switch (ID) { - case LLVMContext::MD_dbg: - case LLVMContext::MD_tbaa: - case LLVMContext::MD_prof: - case LLVMContext::MD_fpmath: - case LLVMContext::MD_tbaa_struct: - case LLVMContext::MD_invariant_load: - case LLVMContext::MD_alias_scope: - case LLVMContext::MD_noalias: - case LLVMContext::MD_nontemporal: - case LLVMContext::MD_mem_parallel_loop_access: - // All of these directly apply. - NewLoad->setMetadata(ID, N); - break; - - case LLVMContext::MD_nonnull: - // This only directly applies if the new type is also a pointer. - if (NewTy->isPointerTy()) { - NewLoad->setMetadata(ID, N); - break; - } - // If it's integral now, translate it to !range metadata. - if (NewTy->isIntegerTy()) { - auto *ITy = cast(NewTy); - auto *NullInt = ConstantExpr::getPtrToInt( - ConstantPointerNull::get(cast(Ptr->getType())), ITy); - auto *NonNullInt = - ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1)); - NewLoad->setMetadata(LLVMContext::MD_range, - MDB.createRange(NonNullInt, NullInt)); - } - break; - case LLVMContext::MD_align: - case LLVMContext::MD_dereferenceable: - case LLVMContext::MD_dereferenceable_or_null: - // These only directly apply if the new type is also a pointer. - if (NewTy->isPointerTy()) - NewLoad->setMetadata(ID, N); - break; - case LLVMContext::MD_range: - // FIXME: It would be nice to propagate this in some way, but the type - // conversions make it hard. - - // If it's a pointer now and the range does not contain 0, make it !nonnull. - if (NewTy->isPointerTy()) { - unsigned BitWidth = IC.getDataLayout().getTypeSizeInBits(NewTy); - if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { - MDNode *NN = MDNode::get(LI.getContext(), None); - NewLoad->setMetadata(LLVMContext::MD_nonnull, NN); - } - } - break; - } - } + copyLoadMetadata(IC.getDataLayout(), *NewLoad, LI, true, true); return NewLoad; } @@ -523,7 +457,7 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) { assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) && "can't fold an atomic store of requested type"); - + Value *Ptr = SI.getPointerOperand(); unsigned AS = SI.getPointerAddressSpace(); SmallVector, 8> MD; Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -2394,13 +2394,12 @@ NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); // Try to preserve nonnull metadata - if (TargetTy->isPointerTy()) - NewLI->copyMetadata(LI, LLVMContext::MD_nonnull); V = NewLI; // If this is an integer load past the end of the slice (which means the // bytes outside the slice are undef or this load is dead) just forcibly // fix the integer size with correct handling of endianness. + bool DidZext = false; if (auto *AITy = dyn_cast(NewAllocaTy)) if (auto *TITy = dyn_cast(TargetTy)) if (AITy->getBitWidth() < TITy->getBitWidth()) { @@ -2408,7 +2407,11 @@ if (DL.isBigEndian()) V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(), "endian_shift"); + DidZext = true; } + + if (!DidZext) + copyLoadMetadata(DL, *NewLI, LI, true, false); } else { Type *LTy = TargetTy->getPointerTo(AS); LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy), @@ -3578,7 +3581,7 @@ PartPtrTy, BasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); - PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); + PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); // Append this load onto the list of split loads so we can find it later // to rewrite the stores. Index: lib/Transforms/Utils/Local.cpp =================================================================== --- lib/Transforms/Utils/Local.cpp +++ lib/Transforms/Utils/Local.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" @@ -1081,7 +1082,7 @@ } /// See if there is a dbg.value intrinsic for DIVar for the PHI node. -static bool PhiHasDebugValue(DILocalVariable *DIVar, +static bool PhiHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr, PHINode *APN) { // Since we can't guarantee that the original dbg.declare instrinsic @@ -1159,7 +1160,7 @@ DbgValue->insertAfter(LI); } -/// Inserts a llvm.dbg.value intrinsic after a phi +/// Inserts a llvm.dbg.value intrinsic after a phi /// that has an associated llvm.dbg.decl intrinsic. void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, PHINode *APN, DIBuilder &Builder) { @@ -1742,12 +1743,12 @@ // Preserve !invariant.group in K. break; case LLVMContext::MD_align: - K->setMetadata(Kind, + K->setMetadata(Kind, MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); break; case LLVMContext::MD_dereferenceable: case LLVMContext::MD_dereferenceable_or_null: - K->setMetadata(Kind, + K->setMetadata(Kind, MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); break; } @@ -1847,6 +1848,89 @@ return false; } +void llvm::copyLoadMetadata(const DataLayout &DL, + LoadInst &NewLoad, const LoadInst &OldLoad, + bool sameValue, bool sameAddress) { + SmallVector, 8> MD; + OldLoad.getAllMetadata(MD); + MDBuilder MDB(NewLoad.getContext()); + Type *NewTy = NewLoad.getType(); + const Value *Ptr = OldLoad.getPointerOperand(); + + for (const auto &MDPair : MD) { + unsigned ID = MDPair.first; + MDNode *N = MDPair.second; + // Note, essentially every kind of metadata should be preserved here! This + // routine is supposed to clone a load instruction changing *only its type*. + // The only metadata it makes sense to drop is metadata which is invalidated + // when the pointer type changes. This should essentially never be the case + // in LLVM, but we explicitly switch over only known metadata to be + // conservatively correct. If you are adding metadata to LLVM which pertains + // to loads, you almost certainly want to add it here. + switch (ID) { + case LLVMContext::MD_dbg: + case LLVMContext::MD_tbaa: + case LLVMContext::MD_prof: + case LLVMContext::MD_fpmath: + case LLVMContext::MD_tbaa_struct: + case LLVMContext::MD_invariant_load: + case LLVMContext::MD_alias_scope: + case LLVMContext::MD_noalias: + case LLVMContext::MD_nontemporal: + case LLVMContext::MD_mem_parallel_loop_access: + if (sameAddress) { + // All of these apply to the address. + NewLoad.setMetadata(ID, N); + } + break; + + case LLVMContext::MD_nonnull: + if (sameValue) { + if (NewTy->isPointerTy()) { + // If it's a pointer, copy the !nonnull metadata + NewLoad.setMetadata(ID, N); + } else if (NewTy->isIntegerTy()) { + // If it's integral now, translate it to !range metadata. + auto *ITy = cast(NewTy); + auto *NullInt = ConstantExpr::getPtrToInt( + ConstantPointerNull::get(cast(Ptr->getType())), ITy); + auto *NonNullInt = + ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1)); + NewLoad.setMetadata(LLVMContext::MD_range, + MDB.createRange(NonNullInt, NullInt)); + } + } + break; + case LLVMContext::MD_align: + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: + if (sameValue) { + // These only directly apply if the new type is also a pointer. + if (NewTy->isPointerTy()) + NewLoad.setMetadata(ID, N); + } + break; + case LLVMContext::MD_range: + if (sameValue) { + if (NewTy == OldLoad.getType()) { + NewLoad.setMetadata(ID, N); + } else if (NewTy->isPointerTy()) { + // If it's a pointer now and the range does not contain 0, make it !nonnull + unsigned BitWidth = DL.getTypeSizeInBits(NewTy); + if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { + MDNode *NN = MDNode::get(OldLoad.getContext(), None); + NewLoad.setMetadata(LLVMContext::MD_nonnull, NN); + } + } else { + // FIXME: It would be nice to propagate this in some way, but the type + // conversions make it hard. + } + } + break; + } + } +} + namespace { /// A potential constituent of a bitreverse or bswap expression. See /// collectBitParts for a fuller explanation. @@ -1968,7 +2052,7 @@ unsigned NumMaskedBits = AndMask.countPopulation(); if (!MatchBitReversals && NumMaskedBits % 8 != 0) return Result; - + auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, MatchBitReversals, BPS); if (!Res) Index: test/Transforms/SROA/pr32902.ll =================================================================== --- /dev/null +++ test/Transforms/SROA/pr32902.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux" + + + +%foo = type { %bar* } +%bar = type { [4 x i8] } + +declare void @foo(i64* noalias nocapture) + +; Make sure we properly handle the !nonnull attribute when we +; convert a pointer load to an integer load. +; CHECK-LABEL: @_ZN4core3fmt9Formatter12pad_integral17h1dcf0f409406b6e5E() +; CHECK-NEXT: start: +; CHECK-NEXT: %0 = inttoptr i64 0 to %bar* +; CHECK-NEXT: ret %bar* %0 +define %bar* @_ZN4core3fmt9Formatter12pad_integral17h1dcf0f409406b6e5E() { +start: + %arg4.i = alloca %foo, align 8 + + %0 = bitcast %foo* %arg4.i to i64* + store i64 0, i64* %0, align 8 + + %1 = getelementptr inbounds %foo, %foo* %arg4.i, i64 0, i32 0 + %2 = load %bar*, %bar** %1, align 8, !nonnull !0 + ret %bar* %2 +} + +!0 = !{}