Index: include/llvm/Analysis/Loads.h =================================================================== --- include/llvm/Analysis/Loads.h +++ include/llvm/Analysis/Loads.h @@ -78,12 +78,15 @@ /// If AATags is non-null and a load or store is found, the AA tags from the /// load or store are recorded there. If there are no AA tags or if no access /// is found, it is left unmodified. +/// If \c PartialType is non-null and a load or store is found, we return the +/// value of load/store if it's type is same as \c PartialType. This enables partial +/// load/store forwarding. Value *FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan = DefMaxInstsToScan, AliasAnalysis *AA = nullptr, - AAMDNodes *AATags = nullptr); - + AAMDNodes *AATags = nullptr, + Type *PartialType = nullptr); } #endif Index: lib/Analysis/Loads.cpp =================================================================== --- lib/Analysis/Loads.cpp +++ lib/Analysis/Loads.cpp @@ -319,10 +319,14 @@ /// If \c AATags is non-null and a load or store is found, the AA tags from the /// load or store are recorded there. If there are no AA tags or if no access is /// found, it is left unmodified. +/// If \c PartialType is non-null and a load or store is found, we return the +/// value of load/store if it's type is same as \c PartialType. This enables partial +/// load/store forwarding. Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AliasAnalysis *AA, AAMDNodes *AATags) { + AliasAnalysis *AA, AAMDNodes *AATags, + Type *PartialType) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; @@ -362,9 +366,9 @@ // If this is a load of Ptr, the loaded value is available. // (This is true even if the load is volatile or atomic, although // those cases are unlikely.) - if (LoadInst *LI = dyn_cast(Inst)) - if (AreEquivalentAddressValues( - LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) && + if (LoadInst *LI = dyn_cast(Inst)) { + Value *LoadPtr = LI->getPointerOperand()->stripPointerCasts(); + if (AreEquivalentAddressValues(LoadPtr, StrippedPtr) && CastInst::isBitOrNoopPointerCastable(LI->getType(), AccessTy, DL)) { // We can value forward from an atomic to a non-atomic, but not the @@ -377,6 +381,22 @@ return LI; } + // If we support partial value forwarding to the load (PartialType is + // non-null): LI is from a part of the bits (known from PartialType) + // used by Load, return the value for those bits. The PartialType is from + // trunc instructions that use only a part of Load. + if (PartialType && LoadPtr == StrippedPtr && + CastInst::isBitOrNoopPointerCastable(LI->getType(), PartialType, + DL)) { + + // We can value forward from an atomic to a non-atomic, but not the + // other way around. + if (LI->isAtomic() < Load->isAtomic()) + return nullptr; + return LI; + } + } + if (StoreInst *SI = dyn_cast(Inst)) { Value *StorePtr = SI->getPointerOperand()->stripPointerCasts(); // If this is a store through Ptr, the value is available! @@ -409,6 +429,19 @@ if (AA && (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & MRI_Mod) == 0) continue; + // If we support partial value forwarding to the load (PartialType is + // non-null): the store is to a part of the bits (known from PartialType) + // used by the load, return the value for those bits. The PartialType is from + // trunc instructions that use only a part of Load. + if (PartialType && StorePtr == StrippedPtr && + CastInst::isBitOrNoopPointerCastable(SI->getOperand(0)->getType(), + PartialType, DL)) { + // We can value forward from an atomic to a non-atomic, but not the + // other way around. + if (SI->isAtomic() < Load->isAtomic()) + return nullptr; + return SI->getOperand(0); + } // Otherwise the store that may or may not alias the pointer, bail out. ++ScanFrom; return nullptr; Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -14,9 +14,10 @@ #include "InstCombineInternal.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; using namespace PatternMatch; @@ -576,6 +577,19 @@ if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL)) return I; + // When trunc operand is a widened load, see if we can get the value from a + // previous store/load + if (auto *LI = dyn_cast(Src)) { + BasicBlock::iterator BBI(*LI); + // Scan a few instructions up from LI and if we find a partial load/store of + // Type DestTy that feeds into LI, we can replace all uses of the trunc with + // the load/store value. + if (Value *AvailableVal = FindAvailableLoadedValue( + LI, LI->getParent(), BBI, DefMaxInstsToScan, + /* AA */ nullptr, /* AATags */ nullptr, DestTy)) + return replaceInstUsesWith(CI, AvailableVal); + } + return nullptr; } Index: test/Transforms/InstCombine/trunc.ll =================================================================== --- test/Transforms/InstCombine/trunc.ll +++ test/Transforms/InstCombine/trunc.ll @@ -160,3 +160,46 @@ ; CHECK-NEXT: ret i32 %ext } +; The trunc can be replaced with the store value. +declare void @consume(i8) readonly +define i1 @trunc_load_store(i8* align 2 %a) { + store i8 0, i8 *%a, align 2 + %bca = bitcast i8* %a to i16* + %wide.load = load i16, i16* %bca, align 2 + %lowhalf.1 = trunc i16 %wide.load to i8 + call void @consume(i8 %lowhalf.1) + %cmp.2 = icmp ult i16 %wide.load, 256 + ret i1 %cmp.2 +; CHECK-LABEL: @trunc_load_store +; CHECK-NOT: trunc +; CHECK: call void @consume(i8 0) +} + + +; The trunc can be replaced with the load value. +define i1 @trunc_load_load(i8* align 2 %a) { + %pload = load i8, i8* %a, align 2 + %bca = bitcast i8* %a to i16* + %wide.load = load i16, i16* %bca, align 2 + %lowhalf = trunc i16 %wide.load to i8 + call void @consume(i8 %lowhalf) + call void @consume(i8 %pload) + %cmp.2 = icmp ult i16 %wide.load, 256 + ret i1 %cmp.2 +; CHECK-LABEL: @trunc_load_load +; CHECK-NOT: trunc +} + + +; trunc cannot be replaced since store size is not trunc result size +define i1 @trunc_different_size_load(i16 * align 2 %a) { + store i16 0, i16 *%a, align 2 + %bca = bitcast i16* %a to i32* + %wide.load = load i32, i32* %bca, align 2 + %lowhalf = trunc i32 %wide.load to i8 + call void @consume(i8 %lowhalf) + %cmp.2 = icmp ult i32 %wide.load, 256 + ret i1 %cmp.2 +; CHECK-LABEL: @trunc_different_size_load +; CHECK: %lowhalf = trunc i32 %wide.load to i8 +}