Index: include/llvm/Analysis/Loads.h =================================================================== --- include/llvm/Analysis/Loads.h +++ include/llvm/Analysis/Loads.h @@ -78,12 +78,15 @@ /// If AATags is non-null and a load or store is found, the AA tags from the /// load or store are recorded there. If there are no AA tags or if no access /// is found, it is left unmodified. +/// If \c PartialType is non-null and a load or store is found, we return the +/// value of load/store if it's type is same as \c PartialType. This enables partial +/// load/store forwarding. Value *FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan = DefMaxInstsToScan, AliasAnalysis *AA = nullptr, - AAMDNodes *AATags = nullptr); - + AAMDNodes *AATags = nullptr, + Type *PartialType = nullptr); } #endif Index: lib/Analysis/Loads.cpp =================================================================== --- lib/Analysis/Loads.cpp +++ lib/Analysis/Loads.cpp @@ -319,10 +319,14 @@ /// If \c AATags is non-null and a load or store is found, the AA tags from the /// load or store are recorded there. If there are no AA tags or if no access is /// found, it is left unmodified. +/// If \c PartialType is non-null and a load or store is found, we return the +/// value of load/store if it's type is same as \c PartialType. This enables partial +/// load/store forwarding. Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AliasAnalysis *AA, AAMDNodes *AATags) { + AliasAnalysis *AA, AAMDNodes *AATags, + Type *PartialType) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; @@ -362,9 +366,9 @@ // If this is a load of Ptr, the loaded value is available. // (This is true even if the load is volatile or atomic, although // those cases are unlikely.) - if (LoadInst *LI = dyn_cast(Inst)) - if (AreEquivalentAddressValues( - LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) && + if (LoadInst *LI = dyn_cast(Inst)) { + Value *LoadPtr = LI->getPointerOperand()->stripPointerCasts(); + if (AreEquivalentAddressValues(LoadPtr, StrippedPtr) && CastInst::isBitOrNoopPointerCastable(LI->getType(), AccessTy, DL)) { // We can value forward from an atomic to a non-atomic, but not the @@ -377,6 +381,14 @@ return LI; } + // If we support partial value forwarding to the load (PartialType is + // non-null): LI is from a part of the bits (known from PartialType) + // used by Load, return the value for those bits. The PartialType is from + // trunc instructions that use only a part of Load. + if (LoadPtr == StrippedPtr && LI->getType() == PartialType) + return LI; + } + if (StoreInst *SI = dyn_cast(Inst)) { Value *StorePtr = SI->getPointerOperand()->stripPointerCasts(); // If this is a store through Ptr, the value is available! @@ -409,6 +421,13 @@ if (AA && (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & MRI_Mod) == 0) continue; + // If we support partial value forwarding to the load (PartialType is + // non-null): the store is to a part of the bits (known from PartialType) + // used by the load, return the value for those bits. The PartialType is from + // trunc instructions that use only a part of Load. + if (PartialType && StorePtr == StrippedPtr && + SI->getOperand(0)->getType() == PartialType) + return SI->getOperand(0); // Otherwise the store that may or may not alias the pointer, bail out. ++ScanFrom; return nullptr; Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -14,9 +14,10 @@ #include "InstCombineInternal.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; using namespace PatternMatch; @@ -576,6 +577,18 @@ if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL)) return I; + // When trunc operand is a widened load, see if we can get the value from a previous store/load + if (auto *LI = dyn_cast(Src)) { + BasicBlock::iterator BBI(*LI); + // Scan a few instructions up from LI and if we find a partial load/store of + // Type DestTy that feeds into LI, we can replace all uses of the trunc with + // the load/store value. + if (Value *AvailableVal = FindAvailableLoadedValue( + LI, LI->getParent(), BBI, DefMaxInstsToScan, + /* AA */ nullptr, /* AATags */ nullptr, DestTy)) + return replaceInstUsesWith(CI, AvailableVal); + } + return nullptr; } Index: test/Transforms/InstCombine/trunc.ll =================================================================== --- test/Transforms/InstCombine/trunc.ll +++ test/Transforms/InstCombine/trunc.ll @@ -160,3 +160,17 @@ ; CHECK-NEXT: ret i32 %ext } +; The trunc can be replaced with the store value. +declare void @consume(i8) readonly +define i1 @trunc_load(i8* align 2 %a) { + store i8 0, i8 *%a, align 2 + %bca = bitcast i8* %a to i16* + %wide.load = load i16, i16* %bca, align 2 + %lowhalf.1 = trunc i16 %wide.load to i8 + call void @consume(i8 %lowhalf.1) + %cmp.2 = icmp ult i16 %wide.load, 256 + ret i1 %cmp.2 +; CHECK-LABEL: @trunc_load +; CHECK-NOT: trunc +; CHECK: call void @consume(i8 0) +}