Index: llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp =================================================================== --- llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -821,6 +821,48 @@ return true; } +// Calculate GEP Stride and accumulated const ModOffset. Return Stride and +// ModOffset +static std::pair +getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL) { + unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType()); + std::optional Stride; + APInt ModOffset(BW, 0); + // Return a minimum gep stride, greatest common divisor of consective gep + // index scales(c.f. Bézout's identity). + while (auto *GEP = dyn_cast(PtrOp)) { + MapVector VarOffsets; + if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset)) + break; + + for (auto [V, Scale] : VarOffsets) { + // Only keep a power of two factor for non-inbounds + if (!GEP->isInBounds()) + Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero()); + + if (!Stride) + Stride = Scale; + else + Stride = APIntOps::GreatestCommonDivisor(*Stride, Scale); + } + + PtrOp = GEP->getPointerOperand(); + } + + // Check whether pointer arrives back at Global Variable via at least one GEP. + // Even if it doesn't, we can check by alignment. + if (!isa(PtrOp) || !Stride) + return {APInt(BW, 1), APInt(BW, 0)}; + + // In consideration of signed GEP indices, non-negligible offset become + // remainder of division by minimum GEP stride. + ModOffset = ModOffset.srem(*Stride); + if (ModOffset.isNegative()) + ModOffset += *Stride; + + return {*Stride, ModOffset}; +} + /// If C is a constant patterned array and all valid loaded results for given /// alignment are same to a constant, return that constant. static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) { @@ -835,29 +877,24 @@ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return false; - Type *LoadTy = LI->getType(); - Constant *C = GV->getInitializer(); - // Bail for large initializers in excess of 4K to avoid too many scans. + Constant *C = GV->getInitializer(); uint64_t GVSize = DL.getTypeAllocSize(C->getType()); if (!GVSize || 4096 < GVSize) return false; - // Check whether pointer arrives back at Global Variable. - // If PtrOp is neither GlobalVariable nor GEP, it might not arrive back at - // GlobalVariable. - // TODO: implement GEP handling + Type *LoadTy = LI->getType(); unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType()); - // TODO: Determine stride based on GEPs. - APInt Stride(BW, 1); - APInt ConstOffset(BW, 0); + auto [Stride, ConstOffset] = getStrideAndModOffsetOfGEP(PtrOp, DL); // Any possible offset could be multiple of GEP stride. And any valid // offset is multiple of load alignment, so checking only multiples of bigger // one is sufficient to say results' equality. if (auto LA = LI->getAlign(); - LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) + LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) { + ConstOffset = APInt(BW, 0); Stride = APInt(BW, LA.value()); + } Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL); if (!Ca) Index: llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll =================================================================== --- llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll +++ llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll @@ -12,6 +12,13 @@ @constpackedstruct = internal constant <{[8 x i8]}> <{[8 x i8] c"\01\00\01\00\01\00\01\00"}>, align 4 @conststruct = internal constant {i16, [8 x i8]} {i16 1, [8 x i8] c"\01\00\01\00\01\00\01\00"}, align 4 +%struct = type { i128 } +@global = internal constant %struct { i128 1 } +define i32 @no-gep-128-struct(i64 %idx){ + %1 = load i32, ptr @global, align 4 + ret i32 %1 +} + define i8 @inbounds_gep_load_i8_align2(i64 %idx){ ; CHECK-LABEL: @inbounds_gep_load_i8_align2( ; CHECK-NEXT: ret i8 1 @@ -48,47 +55,50 @@ declare ptr @llvm.ptrmask.p0.i64(ptr , i64) ; can't be folded because ptrmask can change ptr, while preserving provenance -define i8 @inbounds_gep_load_i8_align2_ptrmasked(i64 %idx, i64 %mask){ -; CHECK-LABEL: @inbounds_gep_load_i8_align2_ptrmasked( -; CHECK-NEXT: ret i8 1 +; This invalidates GEP indices analysis +define i8 @inbounds_gep_load_i16_align1_ptrmasked(i64 %idx, i64 %mask){ +; CHECK-LABEL: @inbounds_gep_load_i16_align1_ptrmasked( +; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr @constarray1, i64 [[MASK:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1 +; CHECK-NEXT: ret i8 [[TMP3]] ; %1 = call ptr @llvm.ptrmask.p0.i64(ptr @constarray1, i64 %mask) - %2 = getelementptr inbounds i8, ptr %1, i64 %idx - %3 = load i8, ptr %2, align 2 + %2 = getelementptr inbounds i16, ptr %1, i64 %idx + %3 = load i8, ptr %2, align 1 ret i8 %3 } -; TODO: this will be ret i32 65537(LE), 16777472(BE) define i32 @inbounds_gep_i16_load_i32_align1(i64 %idx){ -; CHECK-LABEL: @inbounds_gep_i16_load_i32_align1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @constarray1, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 -; CHECK-NEXT: ret i32 [[TMP2]] +; LE-LABEL: @inbounds_gep_i16_load_i32_align1( +; LE-NEXT: ret i32 65537 +; +; BE-LABEL: @inbounds_gep_i16_load_i32_align1( +; BE-NEXT: ret i32 16777472 ; %1 = getelementptr inbounds i16, ptr @constarray1, i64 %idx %2 = load i32, ptr %1, align 1 ret i32 %2 } -; TODO: this will be ret i32 65537(LE), 16777472(BE) define i32 @inbounds_gep_i32_load_i32_align8(i64 %idx){ -; CHECK-LABEL: @inbounds_gep_i32_load_i32_align8( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr @constarray1, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -; CHECK-NEXT: ret i32 [[TMP2]] +; LE-LABEL: @inbounds_gep_i32_load_i32_align8( +; LE-NEXT: ret i32 65537 +; +; BE-LABEL: @inbounds_gep_i32_load_i32_align8( +; BE-NEXT: ret i32 16777472 ; %1 = getelementptr inbounds i32, ptr @constarray1, i64 %idx %2 = load i32, ptr %1, align 8 ret i32 %2 } -; TODO: this will be ret i32 65547(LE), 16777472(BE) define i32 @inbounds_gep_i32_load_i32_const_offset(i64 %idx){ -; CHECK-LABEL: @inbounds_gep_i32_load_i32_const_offset( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @constarray2, i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -; CHECK-NEXT: ret i32 [[TMP3]] +; LE-LABEL: @inbounds_gep_i32_load_i32_const_offset( +; LE-NEXT: ret i32 65537 +; +; BE-LABEL: @inbounds_gep_i32_load_i32_const_offset( +; BE-NEXT: ret i32 16777472 ; %1 = getelementptr inbounds i16, ptr @constarray2, i64 1 %2 = getelementptr inbounds i32, ptr %1, i64 %idx @@ -125,13 +135,9 @@ ret i32 %3 } -; TODO: this will be ret i32 42 define i32 @inbounds_gep_i32_load_i32_const_ptr_array(i64 %idx){ ; CHECK-LABEL: @inbounds_gep_i32_load_i32_const_ptr_array( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr @constptrarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK-NEXT: ret i32 42 ; %1 = getelementptr inbounds ptr, ptr @constptrarray, i64 %idx %2 = load ptr, ptr %1, align 4 @@ -163,16 +169,12 @@ ret i32 %2 } -; TODO: this coould be folded into 65537(LE), 16777472(BE) define i32 @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(i64 %idx){ ; LE-LABEL: @inbounds_gep_i32_load_i32_align4_struct_with_const_offset( ; LE-NEXT: ret i32 65537 ; ; BE-LABEL: @inbounds_gep_i32_load_i32_align4_struct_with_const_offset( -; BE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @conststruct, i64 1 -; BE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDX:%.*]] -; BE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -; BE-NEXT: ret i32 [[TMP3]] +; BE-NEXT: ret i32 16777472 ; %1 = getelementptr inbounds i16, ptr @conststruct, i64 1 %2 = getelementptr inbounds i32, ptr %1, i64 %idx