diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1171,10 +1171,11 @@ return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI); } -Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, +Constant *llvm::ConstantFoldCompareInstOperands(unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL, const TargetLibraryInfo *TLI) { + CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate; // fold: icmp (inttoptr x), null -> icmp x, 0 // fold: icmp null, (inttoptr x) -> icmp 0, x // fold: icmp (ptrtoint x), 0 -> icmp x, null @@ -1248,10 +1249,30 @@ Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL); } + + // Convert pointer comparison (base+offset1) pred (base+offset2) into + // offset1 pred offset2, for the case where the offset is inbounds. This + // only works for equality and unsigned comparison, as inbounds permits + // crossing the sign boundary. However, the offset comparison itself is + // signed. + if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(Predicate)) { + unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ops0->getType()); + APInt Offset0(IndexWidth, 0); + Value *Stripped0 = + Ops0->stripAndAccumulateInBoundsConstantOffsets(DL, Offset0); + APInt Offset1(IndexWidth, 0); + Value *Stripped1 = + Ops1->stripAndAccumulateInBoundsConstantOffsets(DL, Offset1); + if (Stripped0 == Stripped1) + return ConstantExpr::getCompare( + ICmpInst::getSignedPredicate(Predicate), + ConstantInt::get(CE0->getContext(), Offset0), + ConstantInt::get(CE0->getContext(), Offset1)); + } } else if (isa(Ops1)) { // If RHS is a constant expression, but the left side isn't, swap the // operands and try again. - Predicate = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)Predicate); + Predicate = ICmpInst::getSwappedPredicate(Predicate); return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI); } diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1316,46 +1316,6 @@ return false; } -/// Compare the two constants as though they were getelementptr indices. -/// This allows coercion of the types to be the same thing. -/// -/// If the two constants are the "same" (after coercion), return 0. If the -/// first is less than the second, return -1, if the second is less than the -/// first, return 1. If the constants are not integral, return -2. -/// -static int IdxCompare(Constant *C1, Constant *C2, Type *ElTy) { - if (C1 == C2) return 0; - - // Ok, we found a different index. If they are not ConstantInt, we can't do - // anything with them. - if (!isa(C1) || !isa(C2)) - return -2; // don't know! - - // We cannot compare the indices if they don't fit in an int64_t. - if (cast(C1)->getValue().getActiveBits() > 64 || - cast(C2)->getValue().getActiveBits() > 64) - return -2; // don't know! - - // Ok, we have two differing integer indices. Sign extend them to be the same - // type. - int64_t C1Val = cast(C1)->getSExtValue(); - int64_t C2Val = cast(C2)->getSExtValue(); - - if (C1Val == C2Val) return 0; // They are equal - - // If the type being indexed over is really just a zero sized type, there is - // no pointer difference being made here. - if (isMaybeZeroSizedType(ElTy)) - return -2; // dunno. - - // If they are really different, now that they are the same type, then we - // found a difference! - if (C1Val < C2Val) - return -1; - else - return 1; -} - /// This function determines if there is anything we can decide about the two /// constants provided. This doesn't need to handle simple things like /// ConstantFP comparisons, but should instead handle ConstantExprs. @@ -1614,16 +1574,7 @@ if (!GV2->hasExternalWeakLinkage()) return ICmpInst::ICMP_ULT; } else if (const GlobalValue *GV = dyn_cast(CE1Op0)) { - if (GV == GV2) { - // If this is a getelementptr of the same global, then it must be - // different. Because the types must match, the getelementptr could - // only have at most one index, and because we fold getelementptr's - // with a single zero index, it must be nonzero. - assert(CE1->getNumOperands() == 2 && - !CE1->getOperand(1)->isNullValue() && - "Surprising getelementptr!"); - return ICmpInst::ICMP_UGT; - } else { + if (GV != GV2) { if (CE1GEP->hasAllZeroIndices()) return areGlobalsPotentiallyEqual(GV, GV2); return ICmpInst::BAD_ICMP_PREDICATE; @@ -1649,48 +1600,6 @@ cast(CE2Op0)); return ICmpInst::BAD_ICMP_PREDICATE; } - // Ok, we know that both getelementptr instructions are based on the - // same global. From this, we can precisely determine the relative - // ordering of the resultant pointers. - unsigned i = 1; - - // The logic below assumes that the result of the comparison - // can be determined by finding the first index that differs. - // This doesn't work if there is over-indexing in any - // subsequent indices, so check for that case first. - if (!CE1->isGEPWithNoNotionalOverIndexing() || - !CE2->isGEPWithNoNotionalOverIndexing()) - return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal. - - // Compare all of the operands the GEP's have in common. - gep_type_iterator GTI = gep_type_begin(CE1); - for (;i != CE1->getNumOperands() && i != CE2->getNumOperands(); - ++i, ++GTI) - switch (IdxCompare(CE1->getOperand(i), - CE2->getOperand(i), GTI.getIndexedType())) { - case -1: return ICmpInst::ICMP_ULT; - case 1: return ICmpInst::ICMP_UGT; - case -2: return ICmpInst::BAD_ICMP_PREDICATE; - } - - // Ok, we ran out of things they have in common. If any leftovers - // are non-zero then we have a difference, otherwise we are equal. - for (; i < CE1->getNumOperands(); ++i) - if (!CE1->getOperand(i)->isNullValue()) { - if (isa(CE1->getOperand(i))) - return ICmpInst::ICMP_UGT; - else - return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal. - } - - for (; i < CE2->getNumOperands(); ++i) - if (!CE2->getOperand(i)->isNullValue()) { - if (isa(CE2->getOperand(i))) - return ICmpInst::ICMP_ULT; - else - return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal. - } - return ICmpInst::ICMP_EQ; } } } diff --git a/llvm/test/Assembler/ConstantExprFold.ll b/llvm/test/Assembler/ConstantExprFold.ll --- a/llvm/test/Assembler/ConstantExprFold.ll +++ b/llvm/test/Assembler/ConstantExprFold.ll @@ -21,6 +21,7 @@ %Ty = type { i32, i32 } @B = external global %Ty +; @9 and @11 will be folded by the target-dependent constant folder instead. @9 = global i1 icmp ult (i64* @A, i64* getelementptr (i64, i64* @A, i64 1)) ; true @10 = global i1 icmp slt (i64* @A, i64* getelementptr (i64, i64* @A, i64 0)) ; false @11 = global i1 icmp ult (i32* getelementptr (%Ty, %Ty* @B, i64 0, i32 0), @@ -50,9 +51,9 @@ ; CHECK: @[[GLOB7:[0-9]+]] = global i64 -1 ; CHECK: @[[GLOB8:[0-9]+]] = global i64* @A ; CHECK: @[[B:[a-zA-Z0-9_$"\\.-]+]] = external global [[TY:%.*]] -; CHECK: @[[GLOB9:[0-9]+]] = global i1 true +; CHECK: @[[GLOB9:[0-9]+]] = global i1 icmp ugt (i64* getelementptr inbounds (i64, i64* @A, i64 1), i64* @A) ; CHECK: @[[GLOB10:[0-9]+]] = global i1 false -; CHECK: @[[GLOB11:[0-9]+]] = global i1 true +; CHECK: @[[GLOB11:[0-9]+]] = global i1 icmp ult (i32* getelementptr inbounds ([[TY:%.*]], %Ty* @B, i64 0, i32 0), i32* getelementptr inbounds ([[TY]], %Ty* @B, i64 0, i32 1)) ; CHECK: @[[CONS:[a-zA-Z0-9_$"\\.-]+]] = weak global i32 0, align 8 ; CHECK: @[[GLOB12:[0-9]+]] = global i64 0 ; CHECK: @[[GLOB13:[0-9]+]] = global <2 x i8*> undef diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll --- a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll @@ -198,9 +198,10 @@ ret i1 %cmp } +; This should not fold to true, as the offset is negative. define i1 @global_gep_ugt_global_neg_offset() { ; CHECK-LABEL: @global_gep_ugt_global_neg_offset( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: ret i1 icmp ugt ([2 x i32]* getelementptr ([2 x i32], [2 x i32]* @g, i64 -1), [2 x i32]* @g) ; %gep = getelementptr [2 x i32], [2 x i32]* @g, i64 -1 %cmp = icmp ugt [2 x i32]* %gep, @g @@ -239,7 +240,7 @@ define i1 @global_gep_ugt_global_gep_complex() { ; CHECK-LABEL: @global_gep_ugt_global_gep_complex( -; CHECK-NEXT: ret i1 icmp ugt (i32* bitcast (i8* getelementptr inbounds (i8, i8* bitcast ([2 x i32]* @g to i8*), i64 2) to i32*), i32* getelementptr inbounds ([2 x i32], [2 x i32]* @g, i64 0, i64 0)) +; CHECK-NEXT: ret i1 true ; %gep1 = getelementptr inbounds [2 x i32], [2 x i32]* @g, i64 0, i64 0 %gep2 = getelementptr inbounds [2 x i32], [2 x i32]* @g, i64 0, i64 0