Index: llvm/include/llvm/Analysis/ConstantFolding.h =================================================================== --- llvm/include/llvm/Analysis/ConstantFolding.h +++ llvm/include/llvm/Analysis/ConstantFolding.h @@ -170,6 +170,10 @@ /// represented, return null. Constant *ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty); +/// If C is a constant Aggregate, all elements have equal values and equal types +/// to the load operand type, return first element. +Constant *ConstantFoldLoadFromAllEqAggregate(Constant *C, Type *Ty); + /// canConstantFoldCallTo - Return true if its even possible to fold a call to /// the specified function. bool canConstantFoldCallTo(const CallBase *Call, const Function *F); Index: llvm/lib/Analysis/ConstantFolding.cpp =================================================================== --- llvm/lib/Analysis/ConstantFolding.cpp +++ llvm/lib/Analysis/ConstantFolding.cpp @@ -767,6 +767,43 @@ return nullptr; } +Constant *llvm::ConstantFoldLoadFromAllEqAggregate(Constant *C, Type *Ty) { + + auto *CTy = C->getType(); + if (isa(CTy)) { + auto *ArTy = dyn_cast(CTy); + if (ArTy->getElementType() == Ty) { + uint64_t NumElm = ArTy->getNumElements(); + if (NumElm) { + Constant *EC = C->getAggregateElement(0U); + for (unsigned I = 1; I != NumElm; ++I) + if (EC != C->getAggregateElement(I)) + return nullptr; + return C->getAggregateElement(0U); + } else { + return Constant::getNullValue(Ty); + } + } + } + if (isa(CTy)) { + auto *StTy = dyn_cast(CTy); + uint64_t NumElm = StTy->getNumElements(); + if (NumElm) { + Constant *EC = C->getAggregateElement(0U); + for (unsigned I = 1; I != NumElm; ++I) { + if (Constant *E = C->getAggregateElement(I); + !E || E != EC || E->getType() != Ty) + return nullptr; + } + return EC; + } else { + return Constant::getNullValue(Ty); + } + } + + return nullptr; +} + namespace { /// One of Op0/Op1 is a constant expression. Index: llvm/lib/Analysis/InstructionSimplify.cpp =================================================================== --- llvm/lib/Analysis/InstructionSimplify.cpp +++ llvm/lib/Analysis/InstructionSimplify.cpp @@ -6599,16 +6599,21 @@ return ConstantFoldLoadFromConstPtr(PtrOpC, LI->getType(), Q.DL); // We can only fold the load if it is from a constant global with definitive - // initializer. Skip expensive logic if this is not the case. + // or unique initializer. Skip expensive logic if this is not the case. auto *GV = dyn_cast(getUnderlyingObject(PtrOp)); - if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) + if (!GV || !GV->isConstant() || + (!GV->hasDefinitiveInitializer() && !GV->hasUniqueInitializer())) return nullptr; - // If GlobalVariable's initializer is uniform, then return the constant - // regardless of its offset. + // If GlobalVariable's initializer is uniform or all elements of + // arrays/structs are equal, then return the constant regardless of its + // offset. if (Constant *C = ConstantFoldLoadFromUniformValue(GV->getInitializer(), LI->getType())) return C; + if (Constant *C = ConstantFoldLoadFromAllEqAggregate(GV->getInitializer(), + LI->getType())) + return C; // Try to convert operand into a constant by stripping offsets while looking // through invariant.group intrinsics. Index: llvm/test/Transforms/InstSimplify/load.ll =================================================================== --- llvm/test/Transforms/InstSimplify/load.ll +++ llvm/test/Transforms/InstSimplify/load.ll @@ -84,22 +84,16 @@ define i32 @load_gep_const_alleq_array(i64 %idx) { ; CHECK-LABEL: @load_gep_const_alleq_array( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @constalleqarray, i64 0, i64 [[IDX:%.*]] -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: ret i32 [[LOAD]] +; CHECK-NEXT: ret i32 1 ; %gep = getelementptr inbounds [4 x i32], ptr @constalleqarray, i64 0, i64 %idx %load = load i32, ptr %gep ret i32 %load } -; TODO: fold following two all-eq aggregate type loads. define i8 @load_i8_multi_gep_const_alleq_array(i64 %idx1, i64 %idx2) { ; CHECK-LABEL: @load_i8_multi_gep_const_alleq_array( -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr @constalleqarrayi8, i64 [[IDX1:%.*]] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[GEP1]], i64 [[IDX2:%.*]] -; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 -; CHECK-NEXT: ret i8 [[LOAD]] +; CHECK-NEXT: ret i8 1 ; %gep1 = getelementptr inbounds i8, ptr @constalleqarrayi8, i64 %idx1 %gep = getelementptr inbounds i8, ptr %gep1, i64 %idx2 @@ -109,9 +103,7 @@ define i32 @load_gep_const_alleq_struct(i32 %idx) { ; CHECK-LABEL: @load_gep_const_alleq_struct( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[S:%.*]], ptr @constalleqstruct, i32 [[IDX:%.*]] -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: ret i32 [[LOAD]] +; CHECK-NEXT: ret i32 1 ; %gep = getelementptr inbounds %s, ptr @constalleqstruct, i32 %idx %load = load i32, ptr %gep