Index: llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp =================================================================== --- llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LogicCombine.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -852,6 +853,90 @@ return MadeChange; } +// Return a minimum gep stride, greatest common divisor of consective gep +// indices type sizes (c.f. Bézout's identity). Currently ignore the indices +// constantness and struct type. +uint64_t GetMinimumGEPStride(Value *PtrOp, const DataLayout DL) { + + if (auto GEP = dyn_cast(PtrOp)) { + uint64_t g; + auto Euc = [](uint64_t a, uint64_t b) { + if (a < b) { + uint64_t t = a; + a = b; + b = t; + } + while (b != 0) { + uint64_t r = a % b; + a = b; + b = r; + } + return a; + }; + g = DL.getTypeStoreSize(GEP->getSourceElementType()); + Value *V = GEP; + while (auto GEP = dyn_cast(V)) { + g = Euc(g, DL.getTypeStoreSize(GEP->getResultElementType())); + V = GEP->getPointerOperand(); + } + return g; + } + + return 1; +} + +/// If C is a constant patterned array and all valid loaded results for given +/// alignment are same to a constant, return that constant. +static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) { + + auto *LI = dyn_cast(&I); + if (!LI) + return false; + + // We can only fold the load if it is from a constant global with definitive + // initializer. Skip expensive logic if this is not the case. + auto *PtrOp = LI->getOperand(0); + auto *GV = dyn_cast(getUnderlyingObject(PtrOp)); + if (!GV || !GV->isConstant() || + (!GV->hasDefinitiveInitializer() && !GV->hasUniqueInitializer())) + return false; + + // if Global Variable's alignment is less than load's alignment + // then we can't compute valid offsets. + uint64_t LoadAlign = LI->getAlign().value(); + Type *LoadTy = LI->getType(); + if (GV->getAlign().valueOrOne().value() < LoadAlign) + return false; + Constant *C = GV->getInitializer(); + + unsigned GVSize = DL.getTypeStoreSize(C->getType()); + // Bail for large initializers in excess of 64K to avoid allocating + // too much memory. + if (!GVSize || 4096 < GVSize) + return false; + + unsigned LoadSize = LoadTy->getScalarSizeInBits() / 8; + const APInt Offset(DL.getTypeSizeInBits(C->getType()), 0); + Constant *Ca = ConstantFoldLoadFromConst( + C, LoadTy, APInt(DL.getTypeSizeInBits(C->getType()), 0), DL); + + // Any possible offset could be multiple of minimum GEP stride. And any valid + // offset is multiple of load alignment, so checking onle multiples of bigger + // one is sufficient to say results' equality. + uint64_t Stride = GetMinimumGEPStride(PtrOp, DL); + Stride = Stride < LoadAlign ? LoadAlign : Stride; + + for (uint64_t ByteOffset = Stride, E = GVSize - LoadSize; ByteOffset <= E; + ByteOffset += Stride) + if (Ca != ConstantFoldLoadFromConst( + C, LoadTy, + APInt(DL.getTypeSizeInBits(C->getType()), ByteOffset), DL)) + return false; + I.replaceAllUsesWith(Ca); + + return true; +} + /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. @@ -878,6 +963,7 @@ MadeChange |= tryToFPToSat(I, TTI); MadeChange |= tryToRecognizeTableBasedCttz(I); MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA); + MadeChange |= foldPatternedLoads(I, DL); // NOTE: This function introduces erasing of the instruction `I`, so it // needs to be called at the end of this sequence, otherwise we may make // bugs. Index: llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=aggressive-instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK,LE +; RUN: opt < %s -passes=aggressive-instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK,BE + +@constarray = internal constant [8 x i8] c"\01\00\01\00\01\00\01\00", align 4 +@constpackedstruct = internal constant <{[8 x i8]}> <{[8 x i8] c"\01\00\01\00\01\00\01\00"}>, align 4 + +define i8 @gep_load_i8_align2(i64 %idx){ +; CHECK-LABEL: @gep_load_i8_align2( +; CHECK-NEXT: ret i8 1 +; + %1 = getelementptr inbounds i8, ptr @constarray, i64 %idx + %2 = load i8, ptr %1, align 2 + ret i8 %2 +} + +; can't be folded +define i8 @gep_load_i8_align1(i64 %idx){ +; CHECK-LABEL: @gep_load_i8_align1( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +; CHECK-NEXT: ret i8 [[TMP2]] +; + %1 = getelementptr inbounds i8, ptr @constarray, i64 %idx + %2 = load i8, ptr %1, align 1 + ret i8 %2 +} + +define i32 @gep_i32_load_i32_align4(i64 %idx){ +; LE-LABEL: @gep_i32_load_i32_align4( +; LE-NEXT: ret i32 65537 +; +; BE-LABEL: @gep_i32_load_i32_align4( +; BE-NEXT: ret i32 16777472 +; + %1 = getelementptr inbounds i32, ptr @constarray, i64 %idx + %2 = load i32, ptr %1, align 4 + ret i32 %2 +} + +define i32 @gep_i32_load_i32_align4_packedstruct(i64 %idx){ +; LE-LABEL: @gep_i32_load_i32_align4_packedstruct( +; LE-NEXT: ret i32 65537 +; +; BE-LABEL: @gep_i32_load_i32_align4_packedstruct( +; BE-NEXT: ret i32 16777472 +; + %1 = getelementptr inbounds i32, ptr @constpackedstruct, i64 %idx + %2 = load i32, ptr %1, align 4 + ret i32 %2 +} + +; can't be folded +define i32 @gep_i8_load_i32_align1(i64 %idx){ +; CHECK-LABEL: @gep_i8_load_i32_align1( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %1 = getelementptr inbounds i8, ptr @constarray, i64 %idx + %2 = load i32, ptr %1, align 1 + ret i32 %2 +} + +; can't be folded +define i32 @gep_i8_load_i32_align1_packedstruct(i64 %idx){ +; CHECK-LABEL: @gep_i8_load_i32_align1_packedstruct( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constpackedstruct, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %1 = getelementptr inbounds i8, ptr @constpackedstruct, i64 %idx + %2 = load i32, ptr %1, align 1 + ret i32 %2 +} + +define i32 @gep_i16_load_i32_align1(i64 %idx){ +; LE-LABEL: @gep_i16_load_i32_align1( +; LE-NEXT: ret i32 65537 +; +; BE-LABEL: @gep_i16_load_i32_align1( +; BE-NEXT: ret i32 16777472 +; + %1 = getelementptr inbounds i16, ptr @constarray, i64 %idx + %2 = load i32, ptr %1, align 1 + ret i32 %2 +} Index: llvm/test/Transforms/InstSimplify/load-patterned-aggregates.ll =================================================================== --- llvm/test/Transforms/InstSimplify/load-patterned-aggregates.ll +++ /dev/null @@ -1,134 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instsimplify -S | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" -@constzeroarray = internal constant [4 x i32] zeroinitializer - -@constarray = internal constant [8 x i8] c"\01\00\01\00\01\00\01\00", align 4 -@conststruct = internal constant <{[8 x i8]}> <{[8 x i8] c"\01\00\01\00\01\00\01\00"}>, align 4 - -define i32 @load_gep_const_zero_array(i64 %idx) { -; CHECK-LABEL: @load_gep_const_zero_array( -; CHECK-NEXT: ret i32 0 -; - %gep = getelementptr inbounds [4 x i32], ptr @constzeroarray, i64 0, i64 %idx - %load = load i32, ptr %gep - ret i32 %load -} - -define i8 @load_i8_multi_gep_const_zero_array(i64 %idx1, i64 %idx2) { -; CHECK-LABEL: @load_i8_multi_gep_const_zero_array( -; CHECK-NEXT: ret i8 0 -; - %gep1 = getelementptr inbounds i8, ptr @constzeroarray, i64 %idx1 - %gep = getelementptr inbounds i8, ptr %gep1, i64 %idx2 - %load = load i8, ptr %gep - ret i8 %load -} - - -define i32 @load_gep_const_patterned_array(i64 %idx) { -; CHECK-LABEL: @load_gep_const_patterned_array( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @constarray, i64 0, i64 [[IDX:%.*]] -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: ret i32 [[LOAD]] -; - %gep = getelementptr inbounds [4 x i32], ptr @constarray, i64 0, i64 %idx - %load = load i32, ptr %gep - ret i32 %load -} - -define i8 @load_i8_multi_gep_const_array(i64 %idx1, i64 %idx2) { -; CHECK-LABEL: @load_i8_multi_gep_const_array( -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX1:%.*]] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[GEP1]], i64 [[IDX2:%.*]] -; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 -; CHECK-NEXT: ret i8 [[LOAD]] -; - %gep1 = getelementptr inbounds i8, ptr @constarray, i64 %idx1 - %gep = getelementptr inbounds i8, ptr %gep1, i64 %idx2 - %load = load i8, ptr %gep - ret i8 %load -} - -; TODO: this should be ret i8 1 -define i8 @gep_load_i8_align2(i64 %idx){ -; CHECK-LABEL: @gep_load_i8_align2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 2 -; CHECK-NEXT: ret i8 [[TMP2]] -; - %1 = getelementptr inbounds i8, ptr @constarray, i64 %idx - %2 = load i8, ptr %1, align 2 - ret i8 %2 -} - -; can't be folded -define i8 @gep_load_i8_align1(i64 %idx){ -; CHECK-LABEL: @gep_load_i8_align1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -; CHECK-NEXT: ret i8 [[TMP2]] -; - %1 = getelementptr inbounds i8, ptr @constarray, i64 %idx - %2 = load i8, ptr %1, align 1 - ret i8 %2 -} - -; TODO: this should be ret i8 65537 on the case for little endian -define i32 @gep_i32_load_i32_align4(i64 %idx){ -; CHECK-LABEL: @gep_i32_load_i32_align4( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = getelementptr inbounds i32, ptr @constarray, i64 %idx - %2 = load i32, ptr %1, align 4 - ret i32 %2 -} - -; TODO: this should be ret i8 65537 on the case for little endian -define i32 @gep_i32_load_i32_align4_struct(i64 %idx){ -; CHECK-LABEL: @gep_i32_load_i32_align4_struct( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr @conststruct, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = getelementptr inbounds i32, ptr @conststruct, i64 %idx - %2 = load i32, ptr %1, align 4 - ret i32 %2 -} - -; can't be folded -define i32 @gep_i8_load_i32_align1(i64 %idx){ -; CHECK-LABEL: @gep_i8_load_i32_align1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = getelementptr inbounds i8, ptr @constarray, i64 %idx - %2 = load i32, ptr %1, align 1 - ret i32 %2 -} - -; can't be folded -define i32 @gep_i8_load_i32_align1_struct(i64 %idx){ -; CHECK-LABEL: @gep_i8_load_i32_align1_struct( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @conststruct, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = getelementptr inbounds i8, ptr @conststruct, i64 %idx - %2 = load i32, ptr %1, align 1 - ret i32 %2 -} -; TODO: This could be folded but need to see GEP source types -define i32 @gep_i16_load_i32_align1(i64 %idx){ -; CHECK-LABEL: @gep_i16_load_i32_align1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = getelementptr inbounds i16, ptr @constarray, i64 %idx - %2 = load i32, ptr %1, align 1 - ret i32 %2 -} Index: llvm/test/Transforms/InstSimplify/load.ll =================================================================== --- llvm/test/Transforms/InstSimplify/load.ll +++ llvm/test/Transforms/InstSimplify/load.ll @@ -3,6 +3,7 @@ @zeroinit = constant {} zeroinitializer @poison = constant {} poison +@constzeroarray = internal constant [4 x i32] zeroinitializer define i32 @crash_on_zeroinit() { ; CHECK-LABEL: @crash_on_zeroinit( @@ -40,3 +41,22 @@ %1 = load <3 x float>, ptr getelementptr inbounds (<3 x float>, ptr @constvec, i64 1) ret <3 x float> %1 } + +define i32 @load_gep_const_zero_array(i64 %idx) { +; CHECK-LABEL: @load_gep_const_zero_array( +; CHECK-NEXT: ret i32 0 +; + %gep = getelementptr inbounds [4 x i32], ptr @constzeroarray, i64 0, i64 %idx + %load = load i32, ptr %gep + ret i32 %load +} + +define i8 @load_i8_multi_gep_const_zero_array(i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @load_i8_multi_gep_const_zero_array( +; CHECK-NEXT: ret i8 0 +; + %gep1 = getelementptr inbounds i8, ptr @constzeroarray, i64 %idx1 + %gep = getelementptr inbounds i8, ptr %gep1, i64 %idx2 + %load = load i8, ptr %gep + ret i8 %load +} Index: llvm/test/Transforms/PhaseOrdering/X86/nancvt.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/X86/nancvt.ll +++ llvm/test/Transforms/PhaseOrdering/X86/nancvt.ll @@ -24,21 +24,21 @@ define i32 @main() { ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: -; CHECK-NEXT: store volatile i32 2147027116, ptr @var, align 4 -; CHECK-NEXT: store volatile i32 -1610612736, ptr @var, align 4 -; CHECK-NEXT: store volatile i32 2147027116, ptr @var, align 4 -; CHECK-NEXT: store volatile i32 -2147483648, ptr @var, align 4 -; CHECK-NEXT: store volatile i32 2147027116, ptr @var, align 4 -; CHECK-NEXT: store volatile i32 -1073741824, ptr @var, align 4 +; CHECK-NEXT: store volatile i32 poison, ptr @var, align 4 +; CHECK-NEXT: store volatile i32 poison, ptr @var, align 4 +; CHECK-NEXT: store volatile i32 poison, ptr @var, align 4 +; CHECK-NEXT: store volatile i32 poison, ptr @var, align 4 +; CHECK-NEXT: store volatile i32 poison, ptr @var, align 4 +; CHECK-NEXT: store volatile i32 poison, ptr @var, align 4 ; CHECK-NEXT: store volatile i32 2147228864, ptr @var, align 4 ; CHECK-NEXT: store volatile i32 2147228864, ptr @var, align 4 ; CHECK-NEXT: store volatile i32 2147228864, ptr @var, align 4 -; CHECK-NEXT: store volatile i32 2147027116, ptr @var, align 4 -; CHECK-NEXT: store volatile i32 -1610612736, ptr @var, align 4 -; CHECK-NEXT: store volatile i32 2147027116, ptr @var, align 4 -; CHECK-NEXT: store volatile i32 -2147483648, ptr @var, align 4 -; CHECK-NEXT: store volatile i32 2147027116, ptr @var, align 4 -; CHECK-NEXT: store volatile i32 -1073741824, ptr @var, align 4 +; CHECK-NEXT: store volatile i32 poison, ptr @var, align 4 +; CHECK-NEXT: store volatile i32 poison, ptr @var, align 4 +; CHECK-NEXT: store volatile i32 poison, ptr @var, align 4 +; CHECK-NEXT: store volatile i32 poison, ptr @var, align 4 +; CHECK-NEXT: store volatile i32 poison, ptr @var, align 4 +; CHECK-NEXT: store volatile i32 poison, ptr @var, align 4 ; CHECK-NEXT: store volatile i32 2147228864, ptr @var, align 4 ; CHECK-NEXT: store volatile i32 2147228864, ptr @var, align 4 ; CHECK-NEXT: store volatile i32 2147228864, ptr @var, align 4