Index: llvm/include/llvm/Transforms/InstCombine/InstCombiner.h =================================================================== --- llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -52,6 +52,8 @@ public: /// Maximum size of array considered when transforming. uint64_t MaxArraySizeForCombine = 0; + /// Maximum bitwidth of data considered when transforming. + uint64_t MaxDataSizeForCombine = 0; /// An IRBuilder that automatically inserts new instructions into the /// worklist. Index: llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -11,7 +11,9 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CaptureTracking.h" @@ -20,10 +22,13 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" @@ -108,9 +113,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { - if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() || - GV->getValueType() != GEP->getSourceElementType() || - !GV->isConstant() || !GV->hasDefinitiveInitializer()) + if (LI->isVolatile() || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return nullptr; Constant *Init = GV->getInitializer(); @@ -118,44 +121,30 @@ return nullptr; uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); + uint64_t DataSize = DL.getTypeAllocSize(Init->getType()); + // Don't blow up on huge arrays. if (ArrayElementCount > MaxArraySizeForCombine) return nullptr; - - // There are many forms of this optimization we can handle, for now, just do - // the simple index into a single-dimensional array. - // - // Require: GEP GV, 0, i {{, constant indices}} - if (GEP->getNumOperands() < 3 || - !isa(GEP->getOperand(1)) || - !cast(GEP->getOperand(1))->isZero() || - isa(GEP->getOperand(2))) + if (DataSize > MaxDataSizeForCombine) return nullptr; - // Check that indices after the variable are constants and in-range for the - // type they index. Collect the indices. This is typically for arrays of - // structs. - SmallVector LaterIndices; + Type *LoadedTy = LI->getType(); + uint64_t LoadedTySize = DL.getTypeAllocSize(LoadedTy); + uint64_t PtrBitwidth = DL.getIndexSizeInBits(GEP->getPointerAddressSpace()); + Type *PtrIdxTy = DL.getIndexType(GEP->getType()); - Type *EltTy = Init->getType()->getArrayElementType(); - for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { - ConstantInt *Idx = dyn_cast(GEP->getOperand(i)); - if (!Idx) return nullptr; // Variable index. + MapVector VariableOffset; + APInt ConstantOffset(PtrBitwidth, 0); + GEP->collectOffset(GEP->getModule()->getDataLayout(), PtrBitwidth, + VariableOffset, ConstantOffset); - uint64_t IdxVal = Idx->getZExtValue(); - if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index. + // Do not fold constant + if (VariableOffset.size() == 0) + return nullptr; - if (StructType *STy = dyn_cast(EltTy)) - EltTy = STy->getElementType(IdxVal); - else if (ArrayType *ATy = dyn_cast(EltTy)) { - if (IdxVal >= ATy->getNumElements()) return nullptr; - EltTy = ATy->getElementType(); - } else { - return nullptr; // Unknown type. - } - - LaterIndices.push_back(IdxVal); - } + // There are many forms of this optimization we can handle. + // Fold: cmp(A[ax + by + ... + C], Rhs) <=> cmp(ax + by + .... + C, IndexRhs) enum { Overdefined = -3, Undefined = -2 }; @@ -185,18 +174,17 @@ // the array, this will fully represent all the comparison results. uint64_t MagicBitvector = 0; + Value *Idx = nullptr; + // Scan the array and see if one of our patterns matches. - Constant *CompareRHS = cast(ICI.getOperand(1)); - for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { - Constant *Elt = Init->getAggregateElement(i); - if (!Elt) return nullptr; - - // If this is indexing an array of structures, get the structure element. - if (!LaterIndices.empty()) { - Elt = ConstantFoldExtractValueInstruction(Elt, LaterIndices); - if (!Elt) - return nullptr; - } + Constant *ComparedRHS = cast(ICI.getOperand(1)); + // TODO: Make increase as large as it can be. Proformance. + for (uint64_t i = 0; i <= DataSize - LoadedTySize; ++i) { + APInt Offset(PtrBitwidth, i); + Constant *Elt = ConstantFoldLoadFromConstPtr(GV, LoadedTy, Offset, DL); + + if (!Elt) + return nullptr; // If the element is masked, handle it. if (AndCst) { @@ -207,7 +195,7 @@ // Find out if the comparison would be true or false for the i'th element. Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, - CompareRHS, DL, &TLI); + ComparedRHS, DL, &TLI); // If the result is undef for this element, ignore it. if (isa(C)) { // Extend range state machines to cover this element in case there is an @@ -279,27 +267,16 @@ // Now that we've scanned the entire array, emit our new comparison(s). We // order the state machines in complexity of the generated code. - Value *Idx = GEP->getOperand(2); - - // If the index is larger than the pointer offset size of the target, truncate - // the index down like the GEP would do implicitly. We don't have to do this - // for an inbounds GEP because the index can't be out of range. - if (!GEP->isInBounds()) { - Type *PtrIdxTy = DL.getIndexType(GEP->getType()); - unsigned OffsetSize = PtrIdxTy->getIntegerBitWidth(); - if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > OffsetSize) - Idx = Builder.CreateTrunc(Idx, PtrIdxTy); - } - // If inbounds keyword is not present, Idx * ElementSize can overflow. + // If inbounds keyword is not present, Idx can overflow. // Let's assume that ElementSize is 2 and the wanted value is at offset 0. // Then, there are two possible values for Idx to match offset 0: // 0x00..00, 0x80..00. // Emitting 'icmp eq Idx, 0' isn't correct in this case because the // comparison is false if Idx was 0x80..00. // We need to erase the highest countTrailingZeros(ElementSize) bits of Idx. - unsigned ElementSize = - DL.getTypeAllocSize(Init->getType()->getArrayElementType()); + // TODO: let ElementSize to be gcd(a,b,c,d,...) + unsigned ElementSize = 1; auto MaskIdx = [&](Value *Idx) { if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) { Value *Mask = ConstantInt::get(Idx->getType(), -1); @@ -309,10 +286,40 @@ return Idx; }; + auto GenerateIndexIfNull = [&](Value *CurIdx) { + if (CurIdx) + return CurIdx; + + Value *Idx = ConstantInt::get(PtrIdxTy->getContext(), ConstantOffset); + for (auto [Var, Coefficient] : VariableOffset) { + uint64_t VarBitWidth = Var->getType()->getScalarSizeInBits(); + uint64_t IdxBitWidth = Idx->getType()->getScalarSizeInBits(); + Type *WiderType = + VarBitWidth > IdxBitWidth ? Var->getType() : Idx->getType(); + + Var = Builder.CreateSExtOrTrunc(Var, WiderType); + Idx = Builder.CreateSExtOrTrunc(Idx, WiderType); + Value *Mul = Builder.CreateMul( + Var, + ConstantInt::get(WiderType, Coefficient.sextOrTrunc( + WiderType->getScalarSizeInBits()))); + Idx = Builder.CreateAdd(Idx, Mul); + } + + // If the index is larger than the pointer offset size of the target, + // truncate the index down like the GEP would do implicitly. We don't have + // to do this for an inbounds GEP because the index can't be out of range. + + if (Idx->getType()->getScalarSizeInBits() > PtrBitwidth) + Idx = Builder.CreateTrunc(Idx, PtrIdxTy); + + return Idx; + }; + // If the comparison is only true for one or two elements, emit direct // comparisons. if (SecondTrueElement != Overdefined) { - Idx = MaskIdx(Idx); + Idx = MaskIdx(GenerateIndexIfNull(Idx)); // None true -> false. if (FirstTrueElement == Undefined) return replaceInstUsesWith(ICI, Builder.getFalse()); @@ -333,7 +340,7 @@ // If the comparison is only false for one or two elements, emit direct // comparisons. if (SecondFalseElement != Overdefined) { - Idx = MaskIdx(Idx); + Idx = MaskIdx(GenerateIndexIfNull(Idx)); // None false -> true. if (FirstFalseElement == Undefined) return replaceInstUsesWith(ICI, Builder.getTrue()); @@ -355,7 +362,7 @@ // where it is true, emit the range check. if (TrueRangeEnd != Overdefined) { assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); - Idx = MaskIdx(Idx); + Idx = MaskIdx(GenerateIndexIfNull(Idx)); // Generate (i-FirstTrue) getType(), - TrueRangeEnd-FirstTrueElement+1); + Value *End = + ConstantInt::get(PtrIdxTy, TrueRangeEnd - FirstTrueElement + 1); return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); } // False range check. if (FalseRangeEnd != Overdefined) { assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); - Idx = MaskIdx(Idx); + Idx = MaskIdx(GenerateIndexIfNull(Idx)); // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). if (FirstFalseElement) { Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); Idx = Builder.CreateAdd(Idx, Offs); } - Value *End = ConstantInt::get(Idx->getType(), - FalseRangeEnd-FirstFalseElement); + Value *End = + ConstantInt::get(Idx->getType(), FalseRangeEnd - FirstFalseElement); return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); } @@ -392,13 +399,15 @@ // Look for an appropriate type: // - The type of Idx if the magic fits // - The smallest fitting legal type - if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) - Ty = Idx->getType(); + uint64_t TraversedElementCount = DataSize - LoadedTySize + 1; + if (TraversedElementCount <= PtrIdxTy->getIntegerBitWidth()) + Ty = PtrIdxTy; else - Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount); + Ty = + DL.getSmallestLegalIntType(Init->getContext(), TraversedElementCount); if (Ty) { - Idx = MaskIdx(Idx); + Idx = MaskIdx(GenerateIndexIfNull(Idx)); Value *V = Builder.CreateIntCast(Idx, Ty, false); V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V); Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -4297,6 +4297,7 @@ InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT, ORE, BFI, PSI, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; + IC.MaxDataSizeForCombine = MaxArraySize * 8; if (!IC.run()) break; Index: llvm/test/Transforms/InstCombine/load-cmp.ll =================================================================== --- llvm/test/Transforms/InstCombine/load-cmp.ll +++ llvm/test/Transforms/InstCombine/load-cmp.ll @@ -25,8 +25,9 @@ define i1 @test1(i32 %X) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[X:%.*]], 9 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[DOTMASK:%.*]] = and i32 [[X:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[DOTMASK]], 9 +; CHECK-NEXT: ret i1 [[TMP1]] ; %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X %Q = load i16, ptr %P @@ -36,9 +37,9 @@ define i1 @test1_noinbounds(i32 %X) { ; CHECK-LABEL: @test1_noinbounds( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 2147483647 -; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 9 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[DOTMASK:%.*]] = and i32 [[X:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[DOTMASK]], 9 +; CHECK-NEXT: ret i1 [[TMP1]] ; %P = getelementptr [10 x i16], ptr @G16, i32 0, i32 %X %Q = load i16, ptr %P @@ -48,9 +49,9 @@ define i1 @test1_noinbounds_i64(i64 %X) { ; CHECK-LABEL: @test1_noinbounds_i64( -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647 -; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[TMP1]], 9 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[DOTMASK1:%.*]] = and i64 [[X:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[DOTMASK1]], 9 +; CHECK-NEXT: ret i1 [[TMP1]] ; %P = getelementptr [10 x i16], ptr @G16, i64 0, i64 %X %Q = load i16, ptr %P @@ -60,9 +61,9 @@ define i1 @test1_noinbounds_as1(i32 %x) { ; CHECK-LABEL: @test1_noinbounds_as1( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 32767 -; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 9 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[DOTMASK1:%.*]] = and i32 [[X:%.*]], 32767 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[DOTMASK1]], 9 +; CHECK-NEXT: ret i1 [[TMP1]] ; %p = getelementptr [10 x i16], ptr addrspace(1) @G16_as1, i16 0, i32 %x %q = load i16, ptr addrspace(1) %p @@ -73,9 +74,9 @@ define i1 @test1_noinbounds_as2(i64 %x) { ; CHECK-LABEL: @test1_noinbounds_as2( -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647 -; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[TMP1]], 9 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[DOTMASK1:%.*]] = and i64 [[X:%.*]], 2147483647 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[DOTMASK1]], 9 +; CHECK-NEXT: ret i1 [[TMP1]] ; %p = getelementptr [10 x i16], ptr addrspace(2) @G16_as2, i16 0, i64 %x %q = load i16, ptr addrspace(2) %p @@ -86,7 +87,10 @@ define i1 @test2(i32 %X) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[X:%.*]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 480341, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X @@ -97,7 +101,8 @@ define i1 @test3(i32 %X) { ; CHECK-LABEL: @test3( -; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[X:%.*]], 1 +; CHECK-NEXT: [[DOTMASK:%.*]] = and i32 [[X:%.*]], 536870911 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[DOTMASK]], 1 ; CHECK-NEXT: ret i1 [[R]] ; %P = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 %X @@ -109,9 +114,10 @@ define i1 @test4(i32 %X) { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 933, [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1 -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 476177, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X @@ -122,10 +128,11 @@ define i1 @test4_i16(i16 %X) { ; CHECK-LABEL: @test4_i16( -; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 933, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1 -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 476177, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP4]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i16 %X @@ -136,9 +143,10 @@ define i1 @test5(i32 %X) { ; CHECK-LABEL: @test5( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[X]], 7 -; CHECK-NEXT: [[R:%.*]] = or i1 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 14 +; CHECK-NEXT: [[R:%.*]] = or i1 [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret i1 [[R]] ; %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X @@ -149,8 +157,11 @@ define i1 @test6(i32 %X) { ; CHECK-LABEL: @test6( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -1 -; CHECK-NEXT: [[R:%.*]] = icmp ult i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 1095216660350, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP4]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %P = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 %X @@ -161,8 +172,11 @@ define i1 @test7(i32 %X) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -4 -; CHECK-NEXT: [[R:%.*]] = icmp ult i32 [[TMP1]], -3 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 1103806595201, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP4]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %P = getelementptr inbounds [6 x double], ptr @GD, i32 0, i32 %X @@ -173,8 +187,10 @@ define i1 @test8(i32 %X) { ; CHECK-LABEL: @test8( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -2 -; CHECK-NEXT: [[S:%.*]] = icmp eq i32 [[TMP1]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 502442, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1 +; CHECK-NEXT: [[S:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK-NEXT: ret i1 [[S]] ; %P = getelementptr inbounds [10 x i16], ptr @G16, i32 0, i32 %X @@ -193,8 +209,11 @@ define i1 @test9(i32 %X) { ; CHECK-LABEL: @test9( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -1 -; CHECK-NEXT: [[R:%.*]] = icmp ult i32 [[TMP1]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 1052673, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP4]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %P = getelementptr inbounds [4 x { i32, i32 } ], ptr @GA, i32 0, i32 %X, i32 1 @@ -266,7 +285,12 @@ define i1 @test10_struct_arr(i32 %x) { ; CHECK-LABEL: @test10_struct_arr( -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 72058693549555968, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP5]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr inbounds [4 x %Foo], ptr @GStructArr, i32 0, i32 %x, i32 2 @@ -277,8 +301,12 @@ define i1 @test10_struct_arr_noinbounds(i32 %x) { ; CHECK-LABEL: @test10_struct_arr_noinbounds( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 268435455 -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 72058693549555968, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP5]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr [4 x %Foo], ptr @GStructArr, i32 0, i32 %x, i32 2 @@ -289,7 +317,13 @@ define i1 @test10_struct_arr_i16(i16 %x) { ; CHECK-LABEL: @test10_struct_arr_i16( -; CHECK-NEXT: [[R:%.*]] = icmp ne i16 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[TMP1]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 4294967280 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP3]], 8 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 72058693549555968, [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP6]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr inbounds [4 x %Foo], ptr @GStructArr, i16 0, i16 %x, i32 2 @@ -300,8 +334,12 @@ define i1 @test10_struct_arr_i64(i64 %x) { ; CHECK-LABEL: @test10_struct_arr_i64( -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967295 -; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[X:%.*]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 4294967280 +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP2]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 72058693549555968, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP5]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr inbounds [4 x %Foo], ptr @GStructArr, i64 0, i64 %x, i32 2 @@ -312,9 +350,13 @@ define i1 @test10_struct_arr_noinbounds_i16(i16 %x) { ; CHECK-LABEL: @test10_struct_arr_noinbounds_i16( -; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 268435455 -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP2]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[TMP1]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 4294967280 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP3]], 8 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 72058693549555968, [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP6]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr [4 x %Foo], ptr @GStructArr, i32 0, i16 %x, i32 2 @@ -325,8 +367,12 @@ define i1 @test10_struct_arr_noinbounds_i64(i64 %x) { ; CHECK-LABEL: @test10_struct_arr_noinbounds_i64( -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 268435455 -; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[X:%.*]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 4294967280 +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP2]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 72058693549555968, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP5]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %p = getelementptr [4 x %Foo], ptr @GStructArr, i32 0, i64 %x, i32 2 @@ -338,6 +384,7 @@ @CG = constant [4 x i32] [i32 1, i32 2, i32 3, i32 4] +; TODO: Fold it globally. define i1 @cmp_load_constant_array0(i64 %x){ ; CHECK-LABEL: @cmp_load_constant_array0( ; CHECK-NEXT: entry: @@ -346,10 +393,8 @@ ; CHECK: case2: ; CHECK-NEXT: ret i1 false ; CHECK: case1: -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]] -; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4 -; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ult i32 [[ISOK]], 3 +; CHECK-NEXT: [[TMP0:%.*]] = and i64 [[X]], 1073741822 +; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp eq i64 [[TMP0]], 0 ; CHECK-NEXT: ret i1 [[COND_INFERRED]] ; entry: @@ -374,11 +419,7 @@ ; CHECK: case2: ; CHECK-NEXT: ret i1 false ; CHECK: case1: -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]] -; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4 -; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ugt i32 [[ISOK]], 10 -; CHECK-NEXT: ret i1 [[COND_INFERRED]] +; CHECK-NEXT: ret i1 false ; entry: %cond = icmp ult i64 %x, 2 @@ -404,10 +445,11 @@ ; CHECK: case2: ; CHECK-NEXT: ret i1 false ; CHECK: case1: -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr i32, ptr @CG_MESSY, i32 [[TMP0]] -; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4 -; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp slt i32 [[ISOK]], 5 +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[X]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4294967292 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 4312859105, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 1 +; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ne i64 [[TMP3]], 0 ; CHECK-NEXT: ret i1 [[COND_INFERRED]] ; entry: @@ -451,4 +493,3 @@ %cond_inferred = icmp ult i32 %isOK, %y ret i1 %cond_inferred } -