Index: include/llvm/Transforms/Utils/VNCoercion.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Utils/VNCoercion.h @@ -0,0 +1,84 @@ +//===- VNCoercion.h - Value Numbering Coercion Utilities --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_VNCOERCION_H +#define LLVM_TRANSFORMS_UTILS_VNCOERCION_H +#include "llvm/IR/IRBuilder.h" + +namespace llvm { +class Function; +class StoreInst; +class LoadInst; +class MemIntrinsic; +class Instruction; +class Value; +class Type; +class DataLayout; +namespace VNCoercion { +/// Return true if CoerceAvailableValueToLoadType would succeed if it was +/// called. +bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, + const DataLayout &DL); + +/// If we saw a store of a value to memory, and then a load from a must-aliased +/// pointer of a different type, try to coerce the stored value tothe loaded +/// type. LoadedTy is the type of the load we want to replace. IRB is +/// IRBuilder used to insert new instructions. +/// +/// If we can't do it, return null. +Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy, + IRBuilder<> &IRB, const DataLayout &DL); + +/// This function determines whether a value for the pointer LoadPtr can be +/// extracted from the store at DepSI. +/// +/// On success, it returns the offset into DepSI that extraction would start. +/// On failure, it returns -1. +int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, + StoreInst *DepSI); + +/// This function determines whether a value for the pointer LoadPtr can be +/// extracted from the load at DepLI. +/// +/// On success, it returns the offset into DepLI that extraction would start. +/// On failure, it returns -1. +int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, + const DataLayout &DL); + +/// This function determines whether a value for the pointer LoadPtr can be +/// extracted from the memory intrinsic at DepMI. +/// +/// On success, it returns the offset into DepMI that extraction would start. +/// On failure, it returns -1. +int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, + MemIntrinsic *DepMI, const DataLayout &DL); + +/// If analyzeLoadFromClobberingStore returned an offset, this function can be +/// used to actually perform the extraction of the bits from the store. It +/// inserts instructions to do so at InsertPt, and returns the extracted value. +Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, + Instruction *InsertPt, const DataLayout &DL); + +/// If analyzeLoadFromClobberingLoad returned an offset, this function can be +/// used to actually perform the extraction of the bits from the load, including +/// any necessary load widening. It inserts instructions to do so at InsertPt, +/// and returns the extracted value. +Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, + Instruction *InsertPt); + +/// If analyzeLoadFromClobberingMemInst returned an offset, this function can be +/// used to actually perform the extraction of the bits from the memory +/// intrinsic. It inserts instructions to do so at InsertPt, and returns the +/// extracted value. +Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, + Type *LoadTy, Instruction *InsertPt, + const DataLayout &DL); +} +} +#endif Index: lib/Transforms/Scalar/GVN.cpp =================================================================== --- lib/Transforms/Scalar/GVN.cpp +++ lib/Transforms/Scalar/GVN.cpp @@ -36,7 +36,6 @@ #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" @@ -51,9 +50,12 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/VNCoercion.h" + #include using namespace llvm; using namespace llvm::gvn; +using namespace llvm::VNCoercion; using namespace PatternMatch; #define DEBUG_TYPE "gvn" @@ -692,442 +694,6 @@ } -/// Return true if CoerceAvailableValueToLoadType will succeed. -static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, - Type *LoadTy, - const DataLayout &DL) { - // If the loaded or stored value is an first class array or struct, don't try - // to transform them. We need to be able to bitcast to integer. - if (LoadTy->isStructTy() || LoadTy->isArrayTy() || - StoredVal->getType()->isStructTy() || - StoredVal->getType()->isArrayTy()) - return false; - - // The store has to be at least as big as the load. - if (DL.getTypeSizeInBits(StoredVal->getType()) < - DL.getTypeSizeInBits(LoadTy)) - return false; - - return true; -} - -/// If we saw a store of a value to memory, and -/// then a load from a must-aliased pointer of a different type, try to coerce -/// the stored value. LoadedTy is the type of the load we want to replace. -/// IRB is IRBuilder used to insert new instructions. -/// -/// If we can't do it, return null. -static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy, - IRBuilder<> &IRB, - const DataLayout &DL) { - assert(CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) && - "precondition violation - materialization can't fail"); - - if (auto *C = dyn_cast(StoredVal)) - if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) - StoredVal = FoldedStoredVal; - - // If this is already the right type, just return it. - Type *StoredValTy = StoredVal->getType(); - - uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy); - uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy); - - // If the store and reload are the same size, we can always reuse it. - if (StoredValSize == LoadedValSize) { - // Pointer to Pointer -> use bitcast. - if (StoredValTy->getScalarType()->isPointerTy() && - LoadedTy->getScalarType()->isPointerTy()) { - StoredVal = IRB.CreateBitCast(StoredVal, LoadedTy); - } else { - // Convert source pointers to integers, which can be bitcast. - if (StoredValTy->getScalarType()->isPointerTy()) { - StoredValTy = DL.getIntPtrType(StoredValTy); - StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy); - } - - Type *TypeToCastTo = LoadedTy; - if (TypeToCastTo->getScalarType()->isPointerTy()) - TypeToCastTo = DL.getIntPtrType(TypeToCastTo); - - if (StoredValTy != TypeToCastTo) - StoredVal = IRB.CreateBitCast(StoredVal, TypeToCastTo); - - // Cast to pointer if the load needs a pointer type. - if (LoadedTy->getScalarType()->isPointerTy()) - StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy); - } - - if (auto *C = dyn_cast(StoredVal)) - if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) - StoredVal = FoldedStoredVal; - - return StoredVal; - } - - // If the loaded value is smaller than the available value, then we can - // extract out a piece from it. If the available value is too small, then we - // can't do anything. - assert(StoredValSize >= LoadedValSize && - "CanCoerceMustAliasedValueToLoad fail"); - - // Convert source pointers to integers, which can be manipulated. - if (StoredValTy->getScalarType()->isPointerTy()) { - StoredValTy = DL.getIntPtrType(StoredValTy); - StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy); - } - - // Convert vectors and fp to integer, which can be manipulated. - if (!StoredValTy->isIntegerTy()) { - StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize); - StoredVal = IRB.CreateBitCast(StoredVal, StoredValTy); - } - - // If this is a big-endian system, we need to shift the value down to the low - // bits so that a truncate will work. - if (DL.isBigEndian()) { - uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) - - DL.getTypeStoreSizeInBits(LoadedTy); - StoredVal = IRB.CreateLShr(StoredVal, ShiftAmt, "tmp"); - } - - // Truncate the integer to the right size now. - Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize); - StoredVal = IRB.CreateTrunc(StoredVal, NewIntTy, "trunc"); - - if (LoadedTy != NewIntTy) { - // If the result is a pointer, inttoptr. - if (LoadedTy->getScalarType()->isPointerTy()) - StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy, "inttoptr"); - else - // Otherwise, bitcast. - StoredVal = IRB.CreateBitCast(StoredVal, LoadedTy, "bitcast"); - } - - if (auto *C = dyn_cast(StoredVal)) - if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) - StoredVal = FoldedStoredVal; - - return StoredVal; -} - -/// This function is called when we have a -/// memdep query of a load that ends up being a clobbering memory write (store, -/// memset, memcpy, memmove). This means that the write *may* provide bits used -/// by the load but we can't be sure because the pointers don't mustalias. -/// -/// Check this case to see if there is anything more we can do before we give -/// up. This returns -1 if we have to give up, or a byte number in the stored -/// value of the piece that feeds the load. -static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, - Value *WritePtr, - uint64_t WriteSizeInBits, - const DataLayout &DL) { - // If the loaded or stored value is a first class array or struct, don't try - // to transform them. We need to be able to bitcast to integer. - if (LoadTy->isStructTy() || LoadTy->isArrayTy()) - return -1; - - int64_t StoreOffset = 0, LoadOffset = 0; - Value *StoreBase = - GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL); - Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL); - if (StoreBase != LoadBase) - return -1; - - // If the load and store are to the exact same address, they should have been - // a must alias. AA must have gotten confused. - // FIXME: Study to see if/when this happens. One case is forwarding a memset - // to a load from the base of the memset. - - // If the load and store don't overlap at all, the store doesn't provide - // anything to the load. In this case, they really don't alias at all, AA - // must have gotten confused. - uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy); - - if ((WriteSizeInBits & 7) | (LoadSize & 7)) - return -1; - uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes. - LoadSize /= 8; - - - bool isAAFailure = false; - if (StoreOffset < LoadOffset) - isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; - else - isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; - - if (isAAFailure) - return -1; - - // If the Load isn't completely contained within the stored bits, we don't - // have all the bits to feed it. We could do something crazy in the future - // (issue a smaller load then merge the bits in) but this seems unlikely to be - // valuable. - if (StoreOffset > LoadOffset || - StoreOffset+StoreSize < LoadOffset+LoadSize) - return -1; - - // Okay, we can do this transformation. Return the number of bytes into the - // store that the load is. - return LoadOffset-StoreOffset; -} - -/// This function is called when we have a -/// memdep query of a load that ends up being a clobbering store. -static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, - StoreInst *DepSI) { - // Cannot handle reading from store of first-class aggregate yet. - if (DepSI->getValueOperand()->getType()->isStructTy() || - DepSI->getValueOperand()->getType()->isArrayTy()) - return -1; - - const DataLayout &DL = DepSI->getModule()->getDataLayout(); - Value *StorePtr = DepSI->getPointerOperand(); - uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()); - return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, - StorePtr, StoreSize, DL); -} - -/// This function is called when we have a -/// memdep query of a load that ends up being clobbered by another load. See if -/// the other load can feed into the second load. -static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, - LoadInst *DepLI, const DataLayout &DL){ - // Cannot handle reading from store of first-class aggregate yet. - if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) - return -1; - - Value *DepPtr = DepLI->getPointerOperand(); - uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()); - int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); - if (R != -1) return R; - - // If we have a load/load clobber an DepLI can be widened to cover this load, - // then we should widen it! - int64_t LoadOffs = 0; - const Value *LoadBase = - GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL); - unsigned LoadSize = DL.getTypeStoreSize(LoadTy); - - unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize( - LoadBase, LoadOffs, LoadSize, DepLI); - if (Size == 0) return -1; - - // Check non-obvious conditions enforced by MDA which we rely on for being - // able to materialize this potentially available value - assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!"); - assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load"); - - return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL); -} - - - -static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, - MemIntrinsic *MI, - const DataLayout &DL) { - // If the mem operation is a non-constant size, we can't handle it. - ConstantInt *SizeCst = dyn_cast(MI->getLength()); - if (!SizeCst) return -1; - uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; - - // If this is memset, we just need to see if the offset is valid in the size - // of the memset.. - if (MI->getIntrinsicID() == Intrinsic::memset) - return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), - MemSizeInBits, DL); - - // If we have a memcpy/memmove, the only case we can handle is if this is a - // copy from constant memory. In that case, we can read directly from the - // constant memory. - MemTransferInst *MTI = cast(MI); - - Constant *Src = dyn_cast(MTI->getSource()); - if (!Src) return -1; - - GlobalVariable *GV = dyn_cast(GetUnderlyingObject(Src, DL)); - if (!GV || !GV->isConstant()) return -1; - - // See if the access is within the bounds of the transfer. - int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, - MI->getDest(), MemSizeInBits, DL); - if (Offset == -1) - return Offset; - - unsigned AS = Src->getType()->getPointerAddressSpace(); - // Otherwise, see if we can constant fold a load from the constant with the - // offset applied as appropriate. - Src = ConstantExpr::getBitCast(Src, - Type::getInt8PtrTy(Src->getContext(), AS)); - Constant *OffsetCst = - ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, - OffsetCst); - Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); - if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL)) - return Offset; - return -1; -} - - -/// This function is called when we have a -/// memdep query of a load that ends up being a clobbering store. This means -/// that the store provides bits used by the load but we the pointers don't -/// mustalias. Check this case to see if there is anything more we can do -/// before we give up. -static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, - Type *LoadTy, - Instruction *InsertPt, const DataLayout &DL){ - LLVMContext &Ctx = SrcVal->getType()->getContext(); - - uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; - uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; - - IRBuilder<> Builder(InsertPt); - - // Compute which bits of the stored value are being used by the load. Convert - // to an integer type to start with. - if (SrcVal->getType()->getScalarType()->isPointerTy()) - SrcVal = Builder.CreatePtrToInt(SrcVal, - DL.getIntPtrType(SrcVal->getType())); - if (!SrcVal->getType()->isIntegerTy()) - SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8)); - - // Shift the bits to the least significant depending on endianness. - unsigned ShiftAmt; - if (DL.isLittleEndian()) - ShiftAmt = Offset*8; - else - ShiftAmt = (StoreSize-LoadSize-Offset)*8; - - if (ShiftAmt) - SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt); - - if (LoadSize != StoreSize) - SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8)); - - return CoerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL); -} - -/// This function is called when we have a -/// memdep query of a load that ends up being a clobbering load. This means -/// that the load *may* provide bits used by the load but we can't be sure -/// because the pointers don't mustalias. Check this case to see if there is -/// anything more we can do before we give up. -static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, - Type *LoadTy, Instruction *InsertPt, - GVN &gvn) { - const DataLayout &DL = SrcVal->getModule()->getDataLayout(); - // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to - // widen SrcVal out to a larger load. - unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType()); - unsigned LoadSize = DL.getTypeStoreSize(LoadTy); - if (Offset+LoadSize > SrcValStoreSize) { - assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); - assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); - // If we have a load/load clobber an DepLI can be widened to cover this - // load, then we should widen it to the next power of 2 size big enough! - unsigned NewLoadSize = Offset+LoadSize; - if (!isPowerOf2_32(NewLoadSize)) - NewLoadSize = NextPowerOf2(NewLoadSize); - - Value *PtrVal = SrcVal->getPointerOperand(); - - // Insert the new load after the old load. This ensures that subsequent - // memdep queries will find the new load. We can't easily remove the old - // load completely because it is already in the value numbering table. - IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); - Type *DestPTy = - IntegerType::get(LoadTy->getContext(), NewLoadSize*8); - DestPTy = PointerType::get(DestPTy, - PtrVal->getType()->getPointerAddressSpace()); - Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); - PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); - LoadInst *NewLoad = Builder.CreateLoad(PtrVal); - NewLoad->takeName(SrcVal); - NewLoad->setAlignment(SrcVal->getAlignment()); - - DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); - DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); - - // Replace uses of the original load with the wider load. On a big endian - // system, we need to shift down to get the relevant bits. - Value *RV = NewLoad; - if (DL.isBigEndian()) - RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8); - RV = Builder.CreateTrunc(RV, SrcVal->getType()); - SrcVal->replaceAllUsesWith(RV); - - // We would like to use gvn.markInstructionForDeletion here, but we can't - // because the load is already memoized into the leader map table that GVN - // tracks. It is potentially possible to remove the load from the table, - // but then there all of the operations based on it would need to be - // rehashed. Just leave the dead load around. - gvn.getMemDep().removeInstruction(SrcVal); - SrcVal = NewLoad; - } - - return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL); -} - - -/// This function is called when we have a -/// memdep query of a load that ends up being a clobbering mem intrinsic. -static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, - Type *LoadTy, Instruction *InsertPt, - const DataLayout &DL){ - LLVMContext &Ctx = LoadTy->getContext(); - uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy)/8; - - IRBuilder<> Builder(InsertPt); - - // We know that this method is only called when the mem transfer fully - // provides the bits for the load. - if (MemSetInst *MSI = dyn_cast(SrcInst)) { - // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and - // independently of what the offset is. - Value *Val = MSI->getValue(); - if (LoadSize != 1) - Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); - - Value *OneElt = Val; - - // Splat the value out to the right number of bits. - for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { - // If we can double the number of bytes set, do it. - if (NumBytesSet*2 <= LoadSize) { - Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); - Val = Builder.CreateOr(Val, ShVal); - NumBytesSet <<= 1; - continue; - } - - // Otherwise insert one byte at a time. - Value *ShVal = Builder.CreateShl(Val, 1*8); - Val = Builder.CreateOr(OneElt, ShVal); - ++NumBytesSet; - } - - return CoerceAvailableValueToLoadType(Val, LoadTy, Builder, DL); - } - - // Otherwise, this is a memcpy/memmove from a constant global. - MemTransferInst *MTI = cast(SrcInst); - Constant *Src = cast(MTI->getSource()); - unsigned AS = Src->getType()->getPointerAddressSpace(); - - // Otherwise, see if we can constant fold a load from the constant with the - // offset applied as appropriate. - Src = ConstantExpr::getBitCast(Src, - Type::getInt8PtrTy(Src->getContext(), AS)); - Constant *OffsetCst = - ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, - OffsetCst); - Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); - return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL); -} /// Given a set of loads specified by ValuesPerBlock, @@ -1184,11 +750,17 @@ if (Load->getType() == LoadTy && Offset == 0) { Res = Load; } else { - Res = GetLoadValueForLoad(Load, Offset, LoadTy, InsertPt, gvn); - + Res = GetLoadValueForLoad(Load, Offset, LoadTy, InsertPt); + // We would like to use gvn.markInstructionForDeletion here, but we can't + // because the load is already memoized into the leader map table that GVN + // tracks. It is potentially possible to remove the load from the table, + // but then there all of the operations based on it would need to be + // rehashed. Just leave the dead load around. + gvn.getMemDep().removeInstruction(Load); DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " << *getCoercedLoadValue() << '\n' - << *Res << '\n' << "\n\n\n"); + << *Res << '\n' + << "\n\n\n"); } } else if (isMemIntrinValue()) { Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy, Index: lib/Transforms/Utils/CMakeLists.txt =================================================================== --- lib/Transforms/Utils/CMakeLists.txt +++ lib/Transforms/Utils/CMakeLists.txt @@ -54,7 +54,8 @@ UnifyFunctionExitNodes.cpp Utils.cpp ValueMapper.cpp - + VNCoercion.cpp + ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/Utils Index: lib/Transforms/Utils/VNCoercion.cpp =================================================================== --- /dev/null +++ lib/Transforms/Utils/VNCoercion.cpp @@ -0,0 +1,439 @@ +#include "llvm/Transforms/Utils/VNCoercion.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "vncoerce" +namespace llvm { +namespace VNCoercion { + +/// Return true if CoerceAvailableValueToLoadType will succeed. +bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, + const DataLayout &DL) { + // If the loaded or stored value is an first class array or struct, don't try + // to transform them. We need to be able to bitcast to integer. + if (LoadTy->isStructTy() || LoadTy->isArrayTy() || + StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy()) + return false; + + // The store has to be at least as big as the load. + if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy)) + return false; + + return true; +} + +/// If we saw a store of a value to memory, and +/// then a load from a must-aliased pointer of a different type, try to coerce +/// the stored value. LoadedTy is the type of the load we want to replace. +/// IRB is IRBuilder used to insert new instructions. +/// +/// If we can't do it, return null. +Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy, + IRBuilder<> &IRB, const DataLayout &DL) { + assert(CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) && + "precondition violation - materialization can't fail"); + + if (auto *C = dyn_cast(StoredVal)) + if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) + StoredVal = FoldedStoredVal; + + // If this is already the right type, just return it. + Type *StoredValTy = StoredVal->getType(); + + uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy); + uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy); + + // If the store and reload are the same size, we can always reuse it. + if (StoredValSize == LoadedValSize) { + // Pointer to Pointer -> use bitcast. + if (StoredValTy->getScalarType()->isPointerTy() && + LoadedTy->getScalarType()->isPointerTy()) { + StoredVal = IRB.CreateBitCast(StoredVal, LoadedTy); + } else { + // Convert source pointers to integers, which can be bitcast. + if (StoredValTy->getScalarType()->isPointerTy()) { + StoredValTy = DL.getIntPtrType(StoredValTy); + StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy); + } + + Type *TypeToCastTo = LoadedTy; + if (TypeToCastTo->getScalarType()->isPointerTy()) + TypeToCastTo = DL.getIntPtrType(TypeToCastTo); + + if (StoredValTy != TypeToCastTo) + StoredVal = IRB.CreateBitCast(StoredVal, TypeToCastTo); + + // Cast to pointer if the load needs a pointer type. + if (LoadedTy->getScalarType()->isPointerTy()) + StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy); + } + + if (auto *C = dyn_cast(StoredVal)) + if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) + StoredVal = FoldedStoredVal; + + return StoredVal; + } + + // If the loaded value is smaller than the available value, then we can + // extract out a piece from it. If the available value is too small, then we + // can't do anything. + assert(StoredValSize >= LoadedValSize && + "CanCoerceMustAliasedValueToLoad fail"); + + // Convert source pointers to integers, which can be manipulated. + if (StoredValTy->getScalarType()->isPointerTy()) { + StoredValTy = DL.getIntPtrType(StoredValTy); + StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy); + } + + // Convert vectors and fp to integer, which can be manipulated. + if (!StoredValTy->isIntegerTy()) { + StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize); + StoredVal = IRB.CreateBitCast(StoredVal, StoredValTy); + } + + // If this is a big-endian system, we need to shift the value down to the low + // bits so that a truncate will work. + if (DL.isBigEndian()) { + uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) - + DL.getTypeStoreSizeInBits(LoadedTy); + StoredVal = IRB.CreateLShr(StoredVal, ShiftAmt, "tmp"); + } + + // Truncate the integer to the right size now. + Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize); + StoredVal = IRB.CreateTrunc(StoredVal, NewIntTy, "trunc"); + + if (LoadedTy != NewIntTy) { + // If the result is a pointer, inttoptr. + if (LoadedTy->getScalarType()->isPointerTy()) + StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy, "inttoptr"); + else + // Otherwise, bitcast. + StoredVal = IRB.CreateBitCast(StoredVal, LoadedTy, "bitcast"); + } + + if (auto *C = dyn_cast(StoredVal)) + if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) + StoredVal = FoldedStoredVal; + + return StoredVal; +} + +/// This function is called when we have a +/// memdep query of a load that ends up being a clobbering memory write (store, +/// memset, memcpy, memmove). This means that the write *may* provide bits used +/// by the load but we can't be sure because the pointers don't mustalias. +/// +/// Check this case to see if there is anything more we can do before we give +/// up. This returns -1 if we have to give up, or a byte number in the stored +/// value of the piece that feeds the load. +static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, + Value *WritePtr, uint64_t WriteSizeInBits, + const DataLayout &DL) { + // If the loaded or stored value is a first class array or struct, don't try + // to transform them. We need to be able to bitcast to integer. + if (LoadTy->isStructTy() || LoadTy->isArrayTy()) + return -1; + + int64_t StoreOffset = 0, LoadOffset = 0; + Value *StoreBase = + GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL); + Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL); + if (StoreBase != LoadBase) + return -1; + + // If the load and store are to the exact same address, they should have been + // a must alias. AA must have gotten confused. + // FIXME: Study to see if/when this happens. One case is forwarding a memset + // to a load from the base of the memset. + + // If the load and store don't overlap at all, the store doesn't provide + // anything to the load. In this case, they really don't alias at all, AA + // must have gotten confused. + uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy); + + if ((WriteSizeInBits & 7) | (LoadSize & 7)) + return -1; + uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes. + LoadSize /= 8; + + bool isAAFailure = false; + if (StoreOffset < LoadOffset) + isAAFailure = StoreOffset + int64_t(StoreSize) <= LoadOffset; + else + isAAFailure = LoadOffset + int64_t(LoadSize) <= StoreOffset; + + if (isAAFailure) + return -1; + + // If the Load isn't completely contained within the stored bits, we don't + // have all the bits to feed it. We could do something crazy in the future + // (issue a smaller load then merge the bits in) but this seems unlikely to be + // valuable. + if (StoreOffset > LoadOffset || + StoreOffset + StoreSize < LoadOffset + LoadSize) + return -1; + + // Okay, we can do this transformation. Return the number of bytes into the + // store that the load is. + return LoadOffset - StoreOffset; +} + +/// This function is called when we have a +/// memdep query of a load that ends up being a clobbering store. +int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, + StoreInst *DepSI) { + // Cannot handle reading from store of first-class aggregate yet. + if (DepSI->getValueOperand()->getType()->isStructTy() || + DepSI->getValueOperand()->getType()->isArrayTy()) + return -1; + + const DataLayout &DL = DepSI->getModule()->getDataLayout(); + Value *StorePtr = DepSI->getPointerOperand(); + uint64_t StoreSize = + DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()); + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize, + DL); +} + +/// This function is called when we have a +/// memdep query of a load that ends up being clobbered by another load. See if +/// the other load can feed into the second load. +int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, + const DataLayout &DL) { + // Cannot handle reading from store of first-class aggregate yet. + if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) + return -1; + + Value *DepPtr = DepLI->getPointerOperand(); + uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()); + int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); + if (R != -1) + return R; + + // If we have a load/load clobber an DepLI can be widened to cover this load, + // then we should widen it! + int64_t LoadOffs = 0; + const Value *LoadBase = + GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL); + unsigned LoadSize = DL.getTypeStoreSize(LoadTy); + + unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize( + LoadBase, LoadOffs, LoadSize, DepLI); + if (Size == 0) + return -1; + + // Check non-obvious conditions enforced by MDA which we rely on for being + // able to materialize this potentially available value + assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!"); + assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load"); + + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL); +} + +int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, + MemIntrinsic *MI, const DataLayout &DL) { + // If the mem operation is a non-constant size, we can't handle it. + ConstantInt *SizeCst = dyn_cast(MI->getLength()); + if (!SizeCst) + return -1; + uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8; + + // If this is memset, we just need to see if the offset is valid in the size + // of the memset.. + if (MI->getIntrinsicID() == Intrinsic::memset) + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), + MemSizeInBits, DL); + + // If we have a memcpy/memmove, the only case we can handle is if this is a + // copy from constant memory. In that case, we can read directly from the + // constant memory. + MemTransferInst *MTI = cast(MI); + + Constant *Src = dyn_cast(MTI->getSource()); + if (!Src) + return -1; + + GlobalVariable *GV = dyn_cast(GetUnderlyingObject(Src, DL)); + if (!GV || !GV->isConstant()) + return -1; + + // See if the access is within the bounds of the transfer. + int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), + MemSizeInBits, DL); + if (Offset == -1) + return Offset; + + unsigned AS = Src->getType()->getPointerAddressSpace(); + // Otherwise, see if we can constant fold a load from the constant with the + // offset applied as appropriate. + Src = + ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, + OffsetCst); + Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); + if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL)) + return Offset; + return -1; +} + +/// This function is called when we have a +/// memdep query of a load that ends up being a clobbering store. This means +/// that the store provides bits used by the load but we the pointers don't +/// mustalias. Check this case to see if there is anything more we can do +/// before we give up. +Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, + Instruction *InsertPt, const DataLayout &DL) { + LLVMContext &Ctx = SrcVal->getType()->getContext(); + + uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; + uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; + + IRBuilder<> Builder(InsertPt); + + // Compute which bits of the stored value are being used by the load. Convert + // to an integer type to start with. + if (SrcVal->getType()->getScalarType()->isPointerTy()) + SrcVal = + Builder.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType())); + if (!SrcVal->getType()->isIntegerTy()) + SrcVal = + Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); + + // Shift the bits to the least significant depending on endianness. + unsigned ShiftAmt; + if (DL.isLittleEndian()) + ShiftAmt = Offset * 8; + else + ShiftAmt = (StoreSize - LoadSize - Offset) * 8; + + if (ShiftAmt) + SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt); + + if (LoadSize != StoreSize) + SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize * 8)); + + return CoerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL); +} + +/// This function is called when we have a +/// memdep query of a load that ends up being a clobbering load. This means +/// that the load *may* provide bits used by the load but we can't be sure +/// because the pointers don't mustalias. Check this case to see if there is +/// anything more we can do before we give up. +Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, + Instruction *InsertPt) { + + const DataLayout &DL = SrcVal->getModule()->getDataLayout(); + // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to + // widen SrcVal out to a larger load. + unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType()); + unsigned LoadSize = DL.getTypeStoreSize(LoadTy); + if (Offset + LoadSize > SrcValStoreSize) { + assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); + assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); + // If we have a load/load clobber an DepLI can be widened to cover this + // load, then we should widen it to the next power of 2 size big enough! + unsigned NewLoadSize = Offset + LoadSize; + if (!isPowerOf2_32(NewLoadSize)) + NewLoadSize = NextPowerOf2(NewLoadSize); + + Value *PtrVal = SrcVal->getPointerOperand(); + + // Insert the new load after the old load. This ensures that subsequent + // memdep queries will find the new load. We can't easily remove the old + // load completely because it is already in the value numbering table. + IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); + Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8); + DestPTy = + PointerType::get(DestPTy, PtrVal->getType()->getPointerAddressSpace()); + Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); + PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); + LoadInst *NewLoad = Builder.CreateLoad(PtrVal); + NewLoad->takeName(SrcVal); + NewLoad->setAlignment(SrcVal->getAlignment()); + + DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); + DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); + + // Replace uses of the original load with the wider load. On a big endian + // system, we need to shift down to get the relevant bits. + Value *RV = NewLoad; + if (DL.isBigEndian()) + RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8); + RV = Builder.CreateTrunc(RV, SrcVal->getType()); + SrcVal->replaceAllUsesWith(RV); + + SrcVal = NewLoad; + } + + return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL); +} + +/// This function is called when we have a +/// memdep query of a load that ends up being a clobbering mem intrinsic. +Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, + Type *LoadTy, Instruction *InsertPt, + const DataLayout &DL) { + LLVMContext &Ctx = LoadTy->getContext(); + uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy) / 8; + + IRBuilder<> Builder(InsertPt); + + // We know that this method is only called when the mem transfer fully + // provides the bits for the load. + if (MemSetInst *MSI = dyn_cast(SrcInst)) { + // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and + // independently of what the offset is. + Value *Val = MSI->getValue(); + if (LoadSize != 1) + Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize * 8)); + + Value *OneElt = Val; + + // Splat the value out to the right number of bits. + for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) { + // If we can double the number of bytes set, do it. + if (NumBytesSet * 2 <= LoadSize) { + Value *ShVal = Builder.CreateShl(Val, NumBytesSet * 8); + Val = Builder.CreateOr(Val, ShVal); + NumBytesSet <<= 1; + continue; + } + + // Otherwise insert one byte at a time. + Value *ShVal = Builder.CreateShl(Val, 1 * 8); + Val = Builder.CreateOr(OneElt, ShVal); + ++NumBytesSet; + } + + return CoerceAvailableValueToLoadType(Val, LoadTy, Builder, DL); + } + + // Otherwise, this is a memcpy/memmove from a constant global. + MemTransferInst *MTI = cast(SrcInst); + Constant *Src = cast(MTI->getSource()); + unsigned AS = Src->getType()->getPointerAddressSpace(); + + // Otherwise, see if we can constant fold a load from the constant with the + // offset applied as appropriate. + Src = + ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, + OffsetCst); + Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); + return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL); +} +} // namespace VNCoercion +} // namespace llvm