Index: llvm/include/llvm/Transforms/InstCombine/InstCombiner.h =================================================================== --- llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -52,6 +52,8 @@ public: /// Maximum size of array considered when transforming. uint64_t MaxArraySizeForCombine = 0; + /// Maximum bytes of data considered when transforming. + uint64_t MaxDataSizeForCombine = 0; /// An IRBuilder that automatically inserts new instructions into the /// worklist. Index: llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -12,6 +12,7 @@ #include "InstCombineInternal.h" #include "llvm/ADT/APSInt.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CaptureTracking.h" @@ -20,10 +21,12 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" @@ -108,55 +111,34 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { - if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() || - GV->getValueType() != GEP->getSourceElementType() || - !GV->isConstant() || !GV->hasDefinitiveInitializer()) + if (LI->isVolatile() || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return nullptr; Constant *Init = GV->getInitializer(); - if (!isa(Init) && !isa(Init)) - return nullptr; + uint64_t DataSize = DL.getTypeAllocSize(Init->getType()); - uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); // Don't blow up on huge arrays. - if (ArrayElementCount > MaxArraySizeForCombine) - return nullptr; - - // There are many forms of this optimization we can handle, for now, just do - // the simple index into a single-dimensional array. - // - // Require: GEP GV, 0, i {{, constant indices}} - if (GEP->getNumOperands() < 3 || - !isa(GEP->getOperand(1)) || - !cast(GEP->getOperand(1))->isZero() || - isa(GEP->getOperand(2))) + if (DataSize > MaxDataSizeForCombine) return nullptr; - // Check that indices after the variable are constants and in-range for the - // type they index. Collect the indices. This is typically for arrays of - // structs. - SmallVector LaterIndices; + Type *LoadedTy = LI->getType(); + uint64_t LoadedTySize = DL.getTypeAllocSize(LoadedTy); + uint64_t PtrBitwidth = DL.getIndexSizeInBits(GEP->getPointerAddressSpace()); + Type *PtrIdxTy = DL.getIndexType(GEP->getType()); - Type *EltTy = Init->getType()->getArrayElementType(); - for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { - ConstantInt *Idx = dyn_cast(GEP->getOperand(i)); - if (!Idx) return nullptr; // Variable index. - - uint64_t IdxVal = Idx->getZExtValue(); - if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index. - - if (StructType *STy = dyn_cast(EltTy)) - EltTy = STy->getElementType(IdxVal); - else if (ArrayType *ATy = dyn_cast(EltTy)) { - if (IdxVal >= ATy->getNumElements()) return nullptr; - EltTy = ATy->getElementType(); - } else { - return nullptr; // Unknown type. - } + MapVector VariableOffsets; + APInt ConstantOffset(PtrBitwidth, 0); + GEP->collectOffset(GEP->getModule()->getDataLayout(), PtrBitwidth, + VariableOffsets, ConstantOffset); - LaterIndices.push_back(IdxVal); - } + // Restrict to one variable currently. + if (VariableOffsets.size() != 1) + return nullptr; + // There are many forms of this optimization we can handle. + // Limit to one variable currently. + // Possible TODO: Fold: cmp(A[ax + by + ... + C], Rhs) <=> cmp(ax + by + .... + // + C, IndexRhs) enum { Overdefined = -3, Undefined = -2 }; // Variables for our state machines. @@ -185,18 +167,36 @@ // the array, this will fully represent all the comparison results. uint64_t MagicBitvector = 0; + Value *Idx = nullptr; + // Scan the array and see if one of our patterns matches. - Constant *CompareRHS = cast(ICI.getOperand(1)); - for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { - Constant *Elt = Init->getAggregateElement(i); - if (!Elt) return nullptr; - - // If this is indexing an array of structures, get the structure element. - if (!LaterIndices.empty()) { - Elt = ConstantFoldExtractValueInstruction(Elt, LaterIndices); - if (!Elt) - return nullptr; - } + Constant *ComparedRHS = cast(ICI.getOperand(1)); + APInt OffsetStep = VariableOffsets.front().second; + // The longest step we can reach once. + uint64_t OffsetStepZExt = OffsetStep.getZExtValue(); + // Offset from constant pointer where we begin scanning the constant. + int64_t BeginOffset = ConstantOffset.getSExtValue(); + + // Make BeginOffset the smallest offset >= 0 + if (BeginOffset % OffsetStepZExt == 0) + BeginOffset = 0; + else if (BeginOffset < 0) + BeginOffset += (-BeginOffset / OffsetStepZExt + 1) * OffsetStepZExt; + else if (BeginOffset > 0) + BeginOffset -= (BeginOffset / OffsetStepZExt) * OffsetStepZExt; + + uint64_t ElementCountToTraverse = (DataSize - BeginOffset) / OffsetStepZExt; + + // Don't traverse too many times. + if (ElementCountToTraverse > MaxArraySizeForCombine) + return nullptr; + + for (uint64_t i = 0; i < ElementCountToTraverse; ++i) { + APInt CurOffset(i * OffsetStep + BeginOffset); + Constant *Elt = ConstantFoldLoadFromConstPtr(GV, LoadedTy, CurOffset, DL); + + if (!Elt) + return nullptr; // If the element is masked, handle it. if (AndCst) { @@ -207,21 +207,22 @@ // Find out if the comparison would be true or false for the i'th element. Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, - CompareRHS, DL, &TLI); + ComparedRHS, DL, &TLI); // If the result is undef for this element, ignore it. if (isa(C)) { // Extend range state machines to cover this element in case there is an // undef in the middle of the range. - if (TrueRangeEnd == (int)i-1) + if (TrueRangeEnd == (int)i - 1) TrueRangeEnd = i; - if (FalseRangeEnd == (int)i-1) + if (FalseRangeEnd == (int)i - 1) FalseRangeEnd = i; continue; } // If we can't compute the result for any of the elements, we have to give // up evaluating the entire conditional. - if (!isa(C)) return nullptr; + if (!isa(C)) + return nullptr; // Otherwise, we know if the comparison is true or false for this element, // update our state machines. @@ -231,7 +232,7 @@ if (IsTrueForElt) { // Update the TrueElement state machine. if (FirstTrueElement == Undefined) - FirstTrueElement = TrueRangeEnd = i; // First true element. + FirstTrueElement = TrueRangeEnd = i; // First true element. else { // Update double-compare state machine. if (SecondTrueElement == Undefined) @@ -240,7 +241,7 @@ SecondTrueElement = Overdefined; // Update range state machine. - if (TrueRangeEnd == (int)i-1) + if (TrueRangeEnd == (int)i - 1) TrueRangeEnd = i; else TrueRangeEnd = Overdefined; @@ -257,7 +258,7 @@ SecondFalseElement = Overdefined; // Update range state machine. - if (FalseRangeEnd == (int)i-1) + if (FalseRangeEnd == (int)i - 1) FalseRangeEnd = i; else FalseRangeEnd = Overdefined; @@ -267,7 +268,6 @@ // If this element is in range, update our magic bitvector. if (i < 64 && IsTrueForElt) MagicBitvector |= 1ULL << i; - // If all of our states become overdefined, bail out early. Since the // predicate is expensive, only check it every 8 elements. This is only // really useful for really huge arrays. @@ -279,40 +279,62 @@ // Now that we've scanned the entire array, emit our new comparison(s). We // order the state machines in complexity of the generated code. - Value *Idx = GEP->getOperand(2); - - // If the index is larger than the pointer offset size of the target, truncate - // the index down like the GEP would do implicitly. We don't have to do this - // for an inbounds GEP because the index can't be out of range. - if (!GEP->isInBounds()) { - Type *PtrIdxTy = DL.getIndexType(GEP->getType()); - unsigned OffsetSize = PtrIdxTy->getIntegerBitWidth(); - if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > OffsetSize) - Idx = Builder.CreateTrunc(Idx, PtrIdxTy); - } - // If inbounds keyword is not present, Idx * ElementSize can overflow. - // Let's assume that ElementSize is 2 and the wanted value is at offset 0. + // If inbounds keyword is not present, Idx * LongestStep can overflow. + // Let's assume that LongestStep is 2 and the wanted value is at offset 0. // Then, there are two possible values for Idx to match offset 0: // 0x00..00, 0x80..00. // Emitting 'icmp eq Idx, 0' isn't correct in this case because the // comparison is false if Idx was 0x80..00. // We need to erase the highest countTrailingZeros(ElementSize) bits of Idx. - unsigned ElementSize = - DL.getTypeAllocSize(Init->getType()->getArrayElementType()); auto MaskIdx = [&](Value *Idx) { - if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) { + if (!GEP->isInBounds() && llvm::countr_zero(OffsetStepZExt) != 0) { Value *Mask = ConstantInt::get(Idx->getType(), -1); - Mask = Builder.CreateLShr(Mask, llvm::countr_zero(ElementSize)); + Mask = Builder.CreateLShr(Mask, llvm::countr_zero(OffsetStepZExt)); Idx = Builder.CreateAnd(Idx, Mask); } return Idx; }; + // Build the index expression lazily. + auto LazyGetIndex = [&](Value *CurIdx) { + if (CurIdx) + return CurIdx; + + // Initial bias for index. For example, when we fold C[x + 3] into + // x < 2, we actually regard it as x < 5 - 3 + Value *Idx = + ConstantInt::get(PtrIdxTy->getContext(), + (BeginOffset - ConstantOffset).sdiv(OffsetStepZExt)); + for (auto [Var, Coefficient] : VariableOffsets) { + uint64_t VarBitWidth = Var->getType()->getScalarSizeInBits(); + uint64_t IdxBitWidth = Idx->getType()->getScalarSizeInBits(); + Type *WiderType = + VarBitWidth > IdxBitWidth ? Var->getType() : Idx->getType(); + + Var = Builder.CreateSExtOrTrunc(Var, WiderType); + Idx = Builder.CreateSExtOrTrunc(Idx, WiderType); + APInt MinCoeffi = Coefficient.sdiv(OffsetStep) + .sextOrTrunc(WiderType->getScalarSizeInBits()); + Value *Mul = + Builder.CreateMul(Var, ConstantInt::get(WiderType, MinCoeffi)); + Idx = Builder.CreateAdd(Idx, Mul); + } + + // If the index is larger than the pointer offset size of the target, + // truncate the index down like the GEP would do implicitly. We don't have + // to do this for an inbounds GEP because the index can't be out of range. + if (!GEP->isInBounds() && + Idx->getType()->getScalarSizeInBits() > PtrBitwidth) + Idx = Builder.CreateTrunc(Idx, PtrIdxTy); + + return MaskIdx(Idx); + }; + // If the comparison is only true for one or two elements, emit direct // comparisons. if (SecondTrueElement != Overdefined) { - Idx = MaskIdx(Idx); + Idx = LazyGetIndex(Idx); // None true -> false. if (FirstTrueElement == Undefined) return replaceInstUsesWith(ICI, Builder.getFalse()); @@ -333,7 +355,7 @@ // If the comparison is only false for one or two elements, emit direct // comparisons. if (SecondFalseElement != Overdefined) { - Idx = MaskIdx(Idx); + Idx = LazyGetIndex(Idx); // None false -> true. if (FirstFalseElement == Undefined) return replaceInstUsesWith(ICI, Builder.getTrue()); @@ -346,7 +368,8 @@ // False for two elements -> 'i != 47 & i != 72'. Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx); - Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); + Value *SecondFalseIdx = + ConstantInt::get(Idx->getType(), SecondFalseElement); Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx); return BinaryOperator::CreateAnd(C1, C2); } @@ -355,7 +378,7 @@ // where it is true, emit the range check. if (TrueRangeEnd != Overdefined) { assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); - Idx = MaskIdx(Idx); + Idx = LazyGetIndex(Idx); // Generate (i-FirstTrue) getType(), - TrueRangeEnd-FirstTrueElement+1); + Value *End = + ConstantInt::get(PtrIdxTy, TrueRangeEnd - FirstTrueElement + 1); return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); } // False range check. if (FalseRangeEnd != Overdefined) { assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); - Idx = MaskIdx(Idx); + Idx = LazyGetIndex(Idx); // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). if (FirstFalseElement) { Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); Idx = Builder.CreateAdd(Idx, Offs); } - Value *End = ConstantInt::get(Idx->getType(), - FalseRangeEnd-FirstFalseElement); + Value *End = + ConstantInt::get(Idx->getType(), FalseRangeEnd - FirstFalseElement); return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); } @@ -392,13 +415,15 @@ // Look for an appropriate type: // - The type of Idx if the magic fits // - The smallest fitting legal type - if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) - Ty = Idx->getType(); + + if (ElementCountToTraverse <= PtrIdxTy->getIntegerBitWidth()) + Ty = PtrIdxTy; else - Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount); + Ty = DL.getSmallestLegalIntType(Init->getContext(), + ElementCountToTraverse); if (Ty) { - Idx = MaskIdx(Idx); + Idx = LazyGetIndex(Idx); Value *V = Builder.CreateIntCast(Idx, Ty, false); V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V); Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -4332,6 +4332,8 @@ InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT, ORE, BFI, PSI, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; + IC.MaxDataSizeForCombine = MaxArraySize * 8; + bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT); MadeChangeInThisIteration |= IC.run(); if (!MadeChangeInThisIteration) Index: llvm/test/Transforms/InstCombine/load-cmp.ll =================================================================== --- llvm/test/Transforms/InstCombine/load-cmp.ll +++ llvm/test/Transforms/InstCombine/load-cmp.ll @@ -215,10 +215,7 @@ define i1 @test10_struct_noinbounds(i32 %x) { ; CHECK-LABEL: @test10_struct_noinbounds( -; CHECK-NEXT: [[P:%.*]] = getelementptr [[FOO:%.*]], ptr @GS, i32 [[X:%.*]], i32 0 -; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[P]], align 8 -; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[Q]], 9 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %p = getelementptr %Foo, ptr @GS, i32 %x, i32 0 %q = load i32, ptr %p @@ -252,11 +249,7 @@ define i1 @test10_struct_noinbounds_i16(i16 %x) { ; CHECK-LABEL: @test10_struct_noinbounds_i16( -; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[P:%.*]] = getelementptr [[FOO:%.*]], ptr @GS, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[P]], align 8 -; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[Q]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 false ; %p = getelementptr %Foo, ptr @GS, i16 %x, i32 0 %q = load i32, ptr %p @@ -266,7 +259,8 @@ define i1 @test10_struct_arr(i32 %x) { ; CHECK-LABEL: @test10_struct_arr( -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -3 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr inbounds [4 x %Foo], ptr @GStructArr, i32 0, i32 %x, i32 2 @@ -277,8 +271,8 @@ define i1 @test10_struct_arr_noinbounds(i32 %x) { ; CHECK-LABEL: @test10_struct_arr_noinbounds( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 268435455 -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 268435453 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr [4 x %Foo], ptr @GStructArr, i32 0, i32 %x, i32 2 @@ -289,7 +283,8 @@ define i1 @test10_struct_arr_i16(i16 %x) { ; CHECK-LABEL: @test10_struct_arr_i16( -; CHECK-NEXT: [[R:%.*]] = icmp ne i16 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[X:%.*]], -3 +; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr inbounds [4 x %Foo], ptr @GStructArr, i16 0, i16 %x, i32 2 @@ -300,8 +295,8 @@ define i1 @test10_struct_arr_i64(i64 %x) { ; CHECK-LABEL: @test10_struct_arr_i64( -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967295 -; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967293 +; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr inbounds [4 x %Foo], ptr @GStructArr, i64 0, i64 %x, i32 2 @@ -313,8 +308,8 @@ define i1 @test10_struct_arr_noinbounds_i16(i16 %x) { ; CHECK-LABEL: @test10_struct_arr_noinbounds_i16( ; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 268435455 -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP2]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 268435453 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP2]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr [4 x %Foo], ptr @GStructArr, i32 0, i16 %x, i32 2 @@ -325,8 +320,8 @@ define i1 @test10_struct_arr_noinbounds_i64(i64 %x) { ; CHECK-LABEL: @test10_struct_arr_noinbounds_i64( -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 268435455 -; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 268435453 +; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr [4 x %Foo], ptr @GStructArr, i32 0, i64 %x, i32 2 @@ -338,6 +333,7 @@ @CG = constant [4 x i32] [i32 1, i32 2, i32 3, i32 4] +; TODO: Fold it globally. define i1 @cmp_load_constant_array0(i64 %x){ ; CHECK-LABEL: @cmp_load_constant_array0( ; CHECK-NEXT: entry: @@ -346,10 +342,8 @@ ; CHECK: case2: ; CHECK-NEXT: ret i1 false ; CHECK: case1: -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]] -; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4 -; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ult i32 [[ISOK]], 3 +; CHECK-NEXT: [[TMP0:%.*]] = and i64 [[X]], 4294967294 +; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp eq i64 [[TMP0]], 0 ; CHECK-NEXT: ret i1 [[COND_INFERRED]] ; entry: @@ -374,11 +368,7 @@ ; CHECK: case2: ; CHECK-NEXT: ret i1 false ; CHECK: case1: -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]] -; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4 -; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ugt i32 [[ISOK]], 10 -; CHECK-NEXT: ret i1 [[COND_INFERRED]] +; CHECK-NEXT: ret i1 false ; entry: %cond = icmp ult i64 %x, 2 @@ -405,9 +395,10 @@ ; CHECK-NEXT: ret i1 false ; CHECK: case1: ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr i32, ptr @CG_MESSY, i32 [[TMP0]] -; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4 -; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp slt i32 [[ISOK]], 5 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1073741823 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 373, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1 +; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK-NEXT: ret i1 [[COND_INFERRED]] ; entry: @@ -427,9 +418,10 @@ define i1 @cmp_diff_load_constant_array_messy0(i64 %x){ ; CHECK-LABEL: @cmp_diff_load_constant_array_messy0( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32 -; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr i32, ptr @CG_MESSY, i32 [[TMP1]] -; CHECK-NEXT: [[ISOK:%.*]] = load i16, ptr [[ISOK_PTR]], align 4 -; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp slt i16 [[ISOK]], 5 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 373, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 1 +; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ne i32 [[TMP4]], 0 ; CHECK-NEXT: ret i1 [[COND_INFERRED]] ; %isOK_ptr = getelementptr i32, ptr @CG_MESSY, i64 %x @@ -440,13 +432,13 @@ define i1 @cmp_diff_load_constant_array_messy1(i64 %x){ ; CHECK-LABEL: @cmp_diff_load_constant_array_messy1( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32 -; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr i6, ptr @CG_MESSY, i32 [[TMP1]] -; CHECK-NEXT: [[ISOK:%.*]] = load i16, ptr [[ISOK_PTR]], align 2 -; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp slt i16 [[ISOK]], 5 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967295 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 66160388071, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 1 +; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ne i64 [[TMP3]], 0 ; CHECK-NEXT: ret i1 [[COND_INFERRED]] ; -%isOK_ptr = getelementptr i6, ptr @CG_MESSY, i64 %x + %isOK_ptr = getelementptr i6, ptr @CG_MESSY, i64 %x %isOK = load i16, ptr %isOK_ptr %cond_inferred = icmp slt i16 %isOK, 5 ret i1 %cond_inferred @@ -479,4 +471,3 @@ %cond_inferred = icmp ult i32 %isOK, %y ret i1 %cond_inferred } - Index: llvm/test/Transforms/InstCombine/opaque-ptr.ll =================================================================== --- llvm/test/Transforms/InstCombine/opaque-ptr.ll +++ llvm/test/Transforms/InstCombine/opaque-ptr.ll @@ -475,10 +475,7 @@ define i1 @cmp_load_gep_global_different_load_type(i64 %idx) { ; CHECK-LABEL: @cmp_load_gep_global_different_load_type( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr [4 x i8], ptr @ary, i64 0, i64 [[IDX:%.*]] -; CHECK-NEXT: [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[LOAD]], 3 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %gep = getelementptr [4 x i8], ptr @ary, i64 0, i64 %idx %load = load i16, ptr %gep @@ -488,10 +485,7 @@ define i1 @cmp_load_gep_global_different_gep_type(i64 %idx) { ; CHECK-LABEL: @cmp_load_gep_global_different_gep_type( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr [4 x i16], ptr @ary, i64 0, i64 [[IDX:%.*]] -; CHECK-NEXT: [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[LOAD]], 3 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %gep = getelementptr [4 x i16], ptr @ary, i64 0, i64 %idx %load = load i16, ptr %gep