Index: include/llvm/Analysis/Loads.h =================================================================== --- include/llvm/Analysis/Loads.h +++ include/llvm/Analysis/Loads.h @@ -61,6 +61,19 @@ /// to scan in the block, used by FindAvailableLoadedValue(). extern cl::opt DefMaxInstsToScan; +/// FindAvailableMemoryContents - Return the value if the partial or full memory +/// contents of the Load are available. The partial or full type is specified +/// through AccessTy. The sanity checks necessary for any transformation done +/// based on the available memory value should be done by the callers of this +/// function. +Value *FindAvailableMemoryContents(LoadInst *Load, Type *AccessTy, + BasicBlock *BB, + BasicBlock::iterator &ScanFrom, + unsigned MaxInstsToScan, + AliasAnalysis *AA = nullptr, + AAMDNodes *AATags = nullptr, + bool *IsLoadCSE = nullptr); + /// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at /// the instruction before ScanFrom) checking to see if we have the value at /// the memory address *Ptr locally available within a small number of Index: lib/Analysis/Loads.cpp =================================================================== --- lib/Analysis/Loads.cpp +++ lib/Analysis/Loads.cpp @@ -300,44 +300,22 @@ "to scan backward from a given instruction, when searching for " "available loaded value")); -/// \brief Scan the ScanBB block backwards to see if we have the value at the -/// memory address *Ptr locally available within a small number of instructions. -/// -/// The scan starts from \c ScanFrom. \c MaxInstsToScan specifies the maximum -/// instructions to scan in the block. If it is set to \c 0, it will scan the whole -/// block. -/// -/// If the value is available, this function returns it. If not, it returns the -/// iterator for the last validated instruction that the value would be live -/// through. If we scanned the entire block and didn't find something that -/// invalidates \c *Ptr or provides it, \c ScanFrom is left at the last -/// instruction processed and this returns null. -/// -/// You can also optionally specify an alias analysis implementation, which -/// makes this more precise. -/// -/// If \c AATags is non-null and a load or store is found, the AA tags from the -/// load or store are recorded there. If there are no AA tags or if no access is -/// found, it is left unmodified. -Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, - BasicBlock::iterator &ScanFrom, - unsigned MaxInstsToScan, - AliasAnalysis *AA, AAMDNodes *AATags, - bool *IsLoadCSE) { +/// \brief Return the value if the partial or full memory +/// contents of the Load is available. The partial or full type is specified +/// through AccessTy. The sanity checks necessary for any transformation done +/// based on the available memory value should be done by the callers of this +/// function. +Value *llvm::FindAvailableMemoryContents(LoadInst *Load, Type *AccessTy, + BasicBlock *ScanBB, + BasicBlock::iterator &ScanFrom, + unsigned MaxInstsToScan, + AliasAnalysis *AA, AAMDNodes *AATags, + bool *IsLoadCSE) { + if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; Value *Ptr = Load->getPointerOperand(); - Type *AccessTy = Load->getType(); - - // We can never remove a volatile load - if (Load->isVolatile()) - return nullptr; - - // Anything stronger than unordered is currently unimplemented. - if (!Load->isUnordered()) - return nullptr; - const DataLayout &DL = ScanBB->getModule()->getDataLayout(); // Try to get the store size for the type. @@ -363,9 +341,9 @@ // If this is a load of Ptr, the loaded value is available. // (This is true even if the load is volatile or atomic, although // those cases are unlikely.) - if (LoadInst *LI = dyn_cast(Inst)) - if (AreEquivalentAddressValues( - LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) && + if (LoadInst *LI = dyn_cast(Inst)) { + Value *LoadPtr = LI->getPointerOperand()->stripPointerCasts(); + if (AreEquivalentAddressValues(LoadPtr, StrippedPtr) && CastInst::isBitOrNoopPointerCastable(LI->getType(), AccessTy, DL)) { // We can value forward from an atomic to a non-atomic, but not the @@ -380,6 +358,8 @@ return LI; } + } + if (StoreInst *SI = dyn_cast(Inst)) { Value *StorePtr = SI->getPointerOperand()->stripPointerCasts(); // If this is a store through Ptr, the value is available! @@ -434,4 +414,48 @@ // Got to the start of the block, we didn't find it, but are done for this // block. return nullptr; + +} + +/// \brief Scan the ScanBB block backwards to see if we have the value at the +/// memory address *Ptr locally available within a small number of instructions. +/// +/// The scan starts from \c ScanFrom. \c MaxInstsToScan specifies the maximum +/// instructions to scan in the block. If it is set to \c 0, it will scan the whole +/// block. +/// +/// If the value is available, this function returns it. If not, it returns the +/// iterator for the last validated instruction that the value would be live +/// through. If we scanned the entire block and didn't find something that +/// invalidates \c *Ptr or provides it, \c ScanFrom is left at the last +/// instruction processed and this returns null. +/// +/// You can also optionally specify an alias analysis implementation, which +/// makes this more precise. +/// +/// If \c AATags is non-null and a load or store is found, the AA tags from the +/// load or store are recorded there. If there are no AA tags or if no access is +/// is found, it is left unmodified. +Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, + BasicBlock::iterator &ScanFrom, + unsigned MaxInstsToScan, + AliasAnalysis *AA, AAMDNodes *AATags, + bool *IsLoadCSE) { + + // We can never remove a volatile load + if (Load->isVolatile()) + return nullptr; + + // Anything stronger than unordered is currently unimplemented. + if (!Load->isUnordered()) + return nullptr; + + Type *AccessTy = Load->getType(); + + // All the logic resides in FindAvailableMemoryContents which returns partial + // or complete load value depending on AccessTy. FindAvailableLoadedValue is a + // wrapper when callers want the complete value at the memory address accessed + // by the load. + return FindAvailableMemoryContents(Load, AccessTy, ScanBB, ScanFrom, + MaxInstsToScan, AA, AATags, IsLoadCSE); } Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -14,9 +14,10 @@ #include "InstCombineInternal.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; using namespace PatternMatch; @@ -576,6 +577,17 @@ if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL)) return I; + // When trunc operand is a widened load, see if we can get the value from a + // previous store/load + if (auto *LI = dyn_cast(Src)) { + BasicBlock::iterator BBI(*LI); + // Scan a few instructions up from LI and if we find a partial load/store of + // Type DestTy that feeds into LI, we can replace all uses of the trunc with + // the load/store value. + if (Value *AvailableVal = FindAvailableMemoryContents( + LI, DestTy, LI->getParent(), BBI, DefMaxInstsToScan)) + return replaceInstUsesWith(CI, AvailableVal); + } return nullptr; } Index: test/Transforms/InstCombine/trunc.ll =================================================================== --- test/Transforms/InstCombine/trunc.ll +++ test/Transforms/InstCombine/trunc.ll @@ -181,3 +181,46 @@ bb2: unreachable } + +declare void @consume(i8) readonly +define i1 @trunc_load_store(i8* align 2 %a) { + store i8 0, i8 *%a, align 2 + %bca = bitcast i8* %a to i16* + %wide.load = load i16, i16* %bca, align 2 + %lowhalf.1 = trunc i16 %wide.load to i8 + call void @consume(i8 %lowhalf.1) + %cmp.2 = icmp ult i16 %wide.load, 256 + ret i1 %cmp.2 +; CHECK-LABEL: @trunc_load_store +; CHECK-NOT: trunc +; CHECK: call void @consume(i8 0) +} + + +; The trunc can be replaced with the load value. +define i1 @trunc_load_load(i8* align 2 %a) { + %pload = load i8, i8* %a, align 2 + %bca = bitcast i8* %a to i16* + %wide.load = load i16, i16* %bca, align 2 + %lowhalf = trunc i16 %wide.load to i8 + call void @consume(i8 %lowhalf) + call void @consume(i8 %pload) + %cmp.2 = icmp ult i16 %wide.load, 256 + ret i1 %cmp.2 +; CHECK-LABEL: @trunc_load_load +; CHECK-NOT: trunc +} + + +; trunc cannot be replaced since store size is not trunc result size +define i1 @trunc_different_size_load(i16 * align 2 %a) { + store i16 0, i16 *%a, align 2 + %bca = bitcast i16* %a to i32* + %wide.load = load i32, i32* %bca, align 2 + %lowhalf = trunc i32 %wide.load to i8 + call void @consume(i8 %lowhalf) + %cmp.2 = icmp ult i32 %wide.load, 256 + ret i1 %cmp.2 +; CHECK-LABEL: @trunc_different_size_load +; CHECK: %lowhalf = trunc i32 %wide.load to i8 +}