Index: llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp =================================================================== --- llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LogicCombine.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -852,6 +853,81 @@ return MadeChange; } +/// If C is a constant patterned array and all valid loaded results for given +/// alignment are same to a constant, return that constant. +static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) { + + auto *LI = dyn_cast(&I); + if (!LI || LI->isVolatile()) + return false; + + // We can only fold the load if it is from a constant global with definitive + // initializer. Skip expensive logic if this is not the case. + auto *PtrOp = LI->getOperand(0); + auto *GV = dyn_cast(getUnderlyingObject(PtrOp)); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) + return false; + + // if Global Variable's alignment is less than load's alignment + // then we can't compute valid offsets. + Type *LoadTy = LI->getType(); + Constant *C = GV->getInitializer(); + + unsigned GVSize = DL.getTypeAllocSize(C->getType()); + // Bail for large initializers in excess of 64K to avoid allocating + // too much memory. + if (!GVSize || 4096 < GVSize) + return false; + // Calculate constant offset and minimum GEP stride + uint64_t Stride = 1; + APInt ConstOffsetAcc(GVSize * 8, 0); + if (auto GEP = dyn_cast(PtrOp)) { + Stride = DL.getTypeAllocSize(GEP->getSourceElementType()); + Value *PtrOpV = GEP; + // Return a minimum gep stride, greatest common divisor of consective gep + // indices type sizes (c.f. Bézout's identity). + while (auto GEP = dyn_cast(PtrOpV)) { + unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType()); + MapVector VarOffsets; + APInt ConstOffset(BW, 0); + if (!GEP->collectOffset(DL, BW, VarOffsets, ConstOffset)) + return false; + + ConstOffsetAcc += APInt(GVSize * 8, ConstOffset.getZExtValue()); + + for (auto [_, I] : VarOffsets) + Stride = + APIntOps::GreatestCommonDivisor(APInt(I.getBitWidth(), Stride), I) + .getZExtValue(); + PtrOpV = GEP->getPointerOperand(); + } + ConstOffsetAcc = + ConstOffsetAcc.urem(APInt(ConstOffsetAcc.getBitWidth(), Stride)); + } + Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffsetAcc, DL); + // TODO: how to handle pointer? + if (!Ca) + return false; + + // Any possible offset could be multiple of minimum GEP stride. And any valid + // offset is multiple of load alignment, so checking onle multiples of bigger + // one is sufficient to say results' equality. + if (LI->getAlign().value() <= GV->getAlign().valueOrOne().value()) + Stride = std::max(Stride, LI->getAlign().value()); + + unsigned LoadSize = DL.getTypeSizeInBits(LoadTy) / 8; + for (uint64_t ByteOffset = ConstOffsetAcc.getZExtValue() + Stride; + ByteOffset <= GVSize - LoadSize; ByteOffset += Stride) + if (Ca != + ConstantFoldLoadFromConst( + C, LoadTy, APInt(DL.getTypeSizeInBits(LoadTy), ByteOffset), DL)) + return false; + + I.replaceAllUsesWith(Ca); + + return true; +} + /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. @@ -878,6 +954,7 @@ MadeChange |= tryToFPToSat(I, TTI); MadeChange |= tryToRecognizeTableBasedCttz(I); MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA); + MadeChange |= foldPatternedLoads(I, DL); // NOTE: This function introduces erasing of the instruction `I`, so it // needs to be called at the end of this sequence, otherwise we may make // bugs.