Index: llvm/include/llvm/Analysis/ConstantFolding.h =================================================================== --- llvm/include/llvm/Analysis/ConstantFolding.h +++ llvm/include/llvm/Analysis/ConstantFolding.h @@ -34,6 +34,8 @@ class Instruction; class TargetLibraryInfo; class Type; +class LoadInst; +class Value; /// If this constant is a constant offset from a global, return the global and /// the constant. Because of constantexprs, this function is recursive. @@ -170,6 +172,10 @@ /// represented, return null. Constant *ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty); +/// If C is a constant patterned array and possible loaded results are same to +/// some constant, return a constant. +Constant *ConstantFoldLoadFromPatternedAggregate(Constant *C, llvm::LoadInst *LI, Value *PtrOp, const DataLayout &DL); + /// canConstantFoldCallTo - Return true if its even possible to fold a call to /// the specified function. bool canConstantFoldCallTo(const CallBase *Call, const Function *F); Index: llvm/lib/Analysis/ConstantFolding.cpp =================================================================== --- llvm/lib/Analysis/ConstantFolding.cpp +++ llvm/lib/Analysis/ConstantFolding.cpp @@ -767,6 +767,76 @@ return nullptr; } +#define DEBUG_TYPE "CU" +Constant *llvm::ConstantFoldLoadFromPatternedAggregate(Constant *C, + LoadInst *LI, + Value *PtrOp, + const DataLayout &DL) { + + auto *CTy = C->getType(); + uint64_t GVSize = 0; + LLVM_DEBUG(errs() << "store size" << CTy->getPrimitiveSizeInBits()/8 << "\n"); + // TODO: Scalar vs fixed size + // TODO: how to handle vector, add test for vector type + // TODO: calculate correct size + if (auto CATy = dyn_cast(CTy)) { + GVSize = CATy->getElementType()->getScalarSizeInBits() / 8 * + CATy->getNumElements(); + } else if (auto CSTy = dyn_cast(CTy)) { + unsigned NumElm = CSTy->getNumElements(); + LLVM_DEBUG(errs() << "struct! num = " << NumElm << "\n"); + for (unsigned I = 0; I < NumElm; I++) + GVSize += CSTy->getElementType(I)->getPrimitiveSizeInBits() / 8; + LLVM_DEBUG(errs() << "struct! size = " << GVSize << "\n"); + } + + // Bail for large initializers in excess of 64K to avoid allocating + // too much memory. + if (UINT16_MAX < GVSize) + return nullptr; + + if (GVSize) { + auto LoadTy = LI->getType(); + unsigned Al = LI->getAlign().value(); + uint64_t LoadSize = LoadTy->getScalarSizeInBits() / 8; + LLVM_DEBUG(errs() << "LoadTy=" << LoadTy << "\n"); + LLVM_DEBUG(errs() << "Al=" << Al << "\n"); + LLVM_DEBUG(errs() << "LoadSize=" << LoadSize << "\n"); + LLVM_DEBUG(errs() << "GVSize=" << GVSize << "\n"); + + SmallVector RawBytes(static_cast(LoadSize)); + unsigned char *GVBytes = RawBytes.data(); + ReadDataFromGlobal(C, 0, GVBytes, GVSize, DL); + for (unsigned ByteOffset = Al; ByteOffset <= GVSize - LoadSize; + ByteOffset += Al){ + + LLVM_DEBUG(errs() << "ByteOffset=" << ByteOffset << "\n"); + for (unsigned I = 0; I < LoadSize; I++) + if (GVBytes[I] != GVBytes[I + ByteOffset]) + return nullptr; + } + + // swap bytes if on big endian + if (!DL.isLittleEndian()) { + for (unsigned I = 0; I < LoadSize / 2; I += 1) { + unsigned char T = GVBytes[I]; + GVBytes[I] = GVBytes[LoadSize - 1 - I]; + GVBytes[LoadSize - 1 - I] = T; + } + } + + // convert bytes to a Load type Constant + StringRef s = StringRef(reinterpret_cast(GVBytes), LoadSize); + Constant *CDA = ConstantDataArray::getRaw(s, 1, LoadTy); + Constant *Res = CDA->getAggregateElement(0U); + + LLVM_DEBUG(errs() << "folded to ";Res->print(errs()); errs() << "\n"); + return Res; + } + + return nullptr; +} +#undef DEBUG_TYPE namespace { /// One of Op0/Op1 is a constant expression. Index: llvm/lib/Analysis/InstructionSimplify.cpp =================================================================== --- llvm/lib/Analysis/InstructionSimplify.cpp +++ llvm/lib/Analysis/InstructionSimplify.cpp @@ -6634,16 +6634,22 @@ return ConstantFoldLoadFromConstPtr(PtrOpC, LI->getType(), Q.DL); // We can only fold the load if it is from a constant global with definitive - // initializer. Skip expensive logic if this is not the case. + // or unique initializer. Skip expensive logic if this is not the case. auto *GV = dyn_cast(getUnderlyingObject(PtrOp)); - if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) + if (!GV || !GV->isConstant() || + (!GV->hasDefinitiveInitializer() && !GV->hasUniqueInitializer())) return nullptr; - // If GlobalVariable's initializer is uniform, then return the constant - // regardless of its offset. + // If GlobalVariable's initializer is uniform or any load results of + // arrays/structs are equal, then return the constant regardless of its + // offset. if (Constant *C = ConstantFoldLoadFromUniformValue(GV->getInitializer(), LI->getType())) return C; + // TODO: pass DL and see endian + if (Constant *C = ConstantFoldLoadFromPatternedAggregate(GV->getInitializer(), + LI, PtrOp, Q.DL)) + return C; // Try to convert operand into a constant by stripping offsets while looking // through invariant.group intrinsics. Index: llvm/test/Transforms/InstSimplify/load-patterned-aggregates.ll =================================================================== --- llvm/test/Transforms/InstSimplify/load-patterned-aggregates.ll +++ llvm/test/Transforms/InstSimplify/load-patterned-aggregates.ll @@ -6,6 +6,8 @@ @constarray = internal constant [8 x i8] c"\01\00\01\00\01\00\01\00", align 4 @conststruct = internal constant <{[8 x i8]}> <{[8 x i8] c"\01\00\01\00\01\00\01\00"}>, align 4 +@g = internal constant i8 42 +@constptrarray = internal constant [3 x ptr] [ptr @g, ptr @g, ptr @g], align 4 define i32 @load_gep_const_zero_array(i64 %idx) { ; CHECK-LABEL: @load_gep_const_zero_array( ; CHECK-NEXT: ret i32 0 @@ -25,39 +27,24 @@ ret i8 %load } - -define i32 @load_gep_const_patterned_array(i64 %idx) { -; CHECK-LABEL: @load_gep_const_patterned_array( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @constarray, i64 0, i64 [[IDX:%.*]] -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: ret i32 [[LOAD]] -; - %gep = getelementptr inbounds [4 x i32], ptr @constarray, i64 0, i64 %idx - %load = load i32, ptr %gep - ret i32 %load -} - -define i8 @load_i8_multi_gep_const_array(i64 %idx1, i64 %idx2) { -; CHECK-LABEL: @load_i8_multi_gep_const_array( -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX1:%.*]] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[GEP1]], i64 [[IDX2:%.*]] -; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 -; CHECK-NEXT: ret i8 [[LOAD]] -; - %gep1 = getelementptr inbounds i8, ptr @constarray, i64 %idx1 - %gep = getelementptr inbounds i8, ptr %gep1, i64 %idx2 - %load = load i8, ptr %gep - ret i8 %load -} - ; TODO: this should be ret i8 1 define i8 @gep_load_i8_align2(i64 %idx){ ; CHECK-LABEL: @gep_load_i8_align2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX:%.*]] +; CHECK-NEXT: ret i8 1 +; + %1 = getelementptr inbounds i8, ptr @constarray, i64 %idx + %2 = load i8, ptr %1, align 2 + ret i8 %2 +} + +; TODO: this should be ret i8 42 +define i8 @gep_load_i8_align2_ptrarray(i64 %idx){ +; CHECK-LABEL: @gep_load_i8_align2_ptrarray( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr @constptrarray, i64 [[IDX:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 2 ; CHECK-NEXT: ret i8 [[TMP2]] ; - %1 = getelementptr inbounds i8, ptr @constarray, i64 %idx + %1 = getelementptr inbounds ptr, ptr @constptrarray, i64 %idx %2 = load i8, ptr %1, align 2 ret i8 %2 } @@ -77,9 +64,7 @@ ; TODO: this should be ret i8 65537 on the case for little endian define i32 @gep_i32_load_i32_align4(i64 %idx){ ; CHECK-LABEL: @gep_i32_load_i32_align4( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: ret i32 65537 ; %1 = getelementptr inbounds i32, ptr @constarray, i64 %idx %2 = load i32, ptr %1, align 4