Index: lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCompares.cpp +++ lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -13,6 +13,7 @@ #include "InstCombineInternal.h" #include "llvm/ADT/APSInt.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -595,6 +596,320 @@ return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset"); } +/// Returns true if we can rewrite Start as a GEP with pointer Base +/// and some integer offset. The nodes that need to be re-written +/// for this transformation will be added to Explored. +static bool canRewriteGEPAsOffset(Value *Start, Value *Base, + const DataLayout &DL, + SetVector &Explored) { + SmallVector WorkList(1, Start); + Explored.insert(Base); + + // The following traversal gives us an order which can be used + // when doing the final transformation. Since in the final + // transformation we create the PHI replacement instructions first, + // we don't have to get them in any particular order. + // + // However, for other instructions we will have to traverse the + // operands of an instruction first, which means that we have to + // do a post-order traversal. + while (!WorkList.empty()) { + SetVector PHIs; + + while (!WorkList.empty()) { + if (Explored.size() >= 100) + return false; + + Value *V = WorkList.back(); + + if (Explored.count(V) != 0) { + WorkList.pop_back(); + continue; + } + + if (!isa(V) && !isa(V) && + !isa(V) && !isa(V)) + // We've found some value that we can't explore which is different from + // the base. Therefore we can't do this transformation. + return false; + + if (isa(V) || isa(V)) { + auto *CI = dyn_cast(V); + if (!CI->isNoopCast(DL)) + return nullptr; + + if (Explored.count(CI->getOperand(0)) == 0) + WorkList.push_back(CI->getOperand(0)); + } + + if (auto *GEP = dyn_cast(V)) { + // We're limiting the GEP to having one index. This will preserve + // the original pointer type. We could handle more cases in the + // future. + if (GEP->getNumIndices() != 1 || !GEP->isInBounds()) + return nullptr; + + if (Explored.count(GEP->getOperand(0)) == 0) + WorkList.push_back(GEP->getOperand(0)); + } + + if (WorkList.back() == V) { + WorkList.pop_back(); + // We've finished visiting this node, mark it as such. + Explored.insert(V); + } + + if (auto *PN = dyn_cast(V)) { + Explored.insert(PN); + PHIs.insert(PN); + } + } + + // Explore the PHI nodes further. + for (auto *PN : PHIs) + for (Value *Op : PN->incoming_values()) + if (Explored.count(Op) == 0) + WorkList.push_back(Op); + } + + // Make sure that we can do this. Since we can't insert GEPs in a basic + // block before a PHI node, we can't easily do this transformation if + // we have PHI node users of transformed instructions. + for (Value *Val : Explored) { + for (Value *Use : Val->uses()) { + + auto *PHI = dyn_cast(Use); + auto *Inst = dyn_cast(Val); + + if (Inst == Base || Inst == PHI || !Inst || !PHI || + Explored.count(PHI) == 0) + continue; + + if (PHI->getParent() == Inst->getParent()) + return false; + } + } + return true; +} + +// Sets the appropriate insert point on Builder where we can add +// a replacement Instruction for V (if that is possible). +static void setInsertionPoint(IRBuilder<> &Builder, Value *V, + bool Before = true) { + if (auto *PHI = dyn_cast(V)) { + Builder.SetInsertPoint(&*PHI->getParent()->getFirstInsertionPt()); + return; + } + if (auto *I = dyn_cast(V)) { + if (!Before) + I = &*std::next(I->getIterator()); + Builder.SetInsertPoint(I); + return; + } + if (auto *A = dyn_cast(V)) { + // Set the insertion point in the entry block. + BasicBlock &Entry = A->getParent()->getEntryBlock(); + Builder.SetInsertPoint(&*Entry.getFirstInsertionPt()); + return; + } + // Otherwise, this is a constant and we don't need to set a new + // insertion point. + assert(isa(V) && "Setting insertion point for unknown value!"); +} + +/// Returns a re-written value of Start as an indexed GEP using Base as a +/// pointer. +static Value *rewriteGEPAsOffset(Value *Start, Value *Base, + const DataLayout &DL, + SetVector &Explored) { + // Perform all the substitutions. This is a bit tricky because we can + // have cycles in our use-def chains. + // 1. Create the PHI nodes without any incoming values. + // 2. Create all the other values. + // 3. Add the edges for the PHI nodes. + // 4. Emit GEPs to get the original pointers. + // 5. Remove the original instructions. + Type *IndexType = IntegerType::get( + Base->getContext(), DL.getPointerTypeSizeInBits(Start->getType())); + + DenseMap NewInsts; + NewInsts[Base] = ConstantInt::getNullValue(IndexType); + + // Create the new PHI nodes, without adding any incoming values. + for (Value *Val : Explored) { + if (Val == Base) + continue; + // Create empty phi nodes. This avoids cyclic dependencies when creating + // the remaining instructions. + if (auto *PHI = dyn_cast(Val)) + NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(), + PHI->getName() + ".idx", PHI); + } + IRBuilder<> Builder(Base->getContext()); + + // Create all the other instructions. + for (Value *Val : Explored) { + + if (NewInsts.find(Val) != NewInsts.end()) + continue; + + if (auto *CI = dyn_cast(Val)) { + NewInsts[CI] = NewInsts[CI->getOperand(0)]; + continue; + } + if (auto *GEP = dyn_cast(Val)) { + Value *Index = NewInsts[GEP->getOperand(1)] + ? NewInsts[GEP->getOperand(1)] + : GEP->getOperand(1); + setInsertionPoint(Builder, GEP); + // Indices might need to be sign extended. GEPs will magically do + // this, but we need to do it ourselves here. + if (Index->getType()->getScalarSizeInBits() != + NewInsts[GEP->getOperand(0)]->getType()->getScalarSizeInBits()) { + Index = Builder.CreateSExtOrTrunc( + Index, NewInsts[GEP->getOperand(0)]->getType(), + GEP->getOperand(0)->getName() + ".sext"); + } + NewInsts[GEP] = + Builder.CreateAdd(NewInsts[GEP->getOperand(0)], Index, + GEP->getOperand(0)->getName() + ".add"); + continue; + + } + if (isa(Val)) + continue; + + llvm_unreachable("Unexpected instruction type"); + } + + // Add the incoming values to the PHI nodes. + for (Value *Val : Explored) { + if (Val == Base) + continue; + // All the instructions have been created, we can now add edges to the + // phi nodes. + if (auto *PHI = dyn_cast(Val)) { + PHINode *NewPhi = static_cast(NewInsts[PHI]); + for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) { + Value *NewIncoming = PHI->getIncomingValue(I); + + if (NewInsts.find(NewIncoming) != NewInsts.end()) + NewIncoming = NewInsts[NewIncoming]; + + NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I)); + } + } + } + + // If required, create a inttoptr instruction. + Value *NewBase = Base; + setInsertionPoint(Builder, Base, false); + if (!Base->getType()->isPointerTy()) + NewBase = Builder.CreateBitOrPointerCast(Base, Start->getType(), + Start->getName() + "to.ptr"); + + for (Value *Val : Explored) { + if (Val == Base) + continue; + + // Depending on the type, for external users we have to emit + // a GEP or a GEP + ptrtoint. + if (isa(NewInsts[Val])) + setInsertionPoint(Builder, NewInsts[Val], false); + else + setInsertionPoint(Builder, NewBase, false); + + Value *GEP = + Builder.CreateInBoundsGEP(Start->getType()->getPointerElementType(), + NewBase, {NewInsts[Val]}, + Val->getName() + ".ptr"); + + if (!Val->getType()->isPointerTy()) { + Value *Cast = Builder.CreatePointerCast(GEP, Val->getType(), + Val->getName() + ".conv"); + GEP = Cast; + } + Val->replaceAllUsesWith(GEP); + } + + return NewInsts[Start]; +} + +/// Looks through GEPs, IntToPtrInsts and PtrToIntInsts in order to express +/// the input Value as a GEP constant indexed GEP. Returns a pair containing +/// the GEPs Pointer and Index. +static std::pair +getAsConstantIndexedAddress(Value *V, const DataLayout &DL) { + Type *IndexType = + IntegerType::get(V->getContext(), + DL.getPointerTypeSizeInBits(V->getType())); + + Constant *Index = ConstantInt::getNullValue(IndexType); + while (true) { + if (GEPOperator *GEP = dyn_cast(V)) { + // We accept only inbouds GEPs here to exclude the possibility of + // overflow. + if (!GEP->isInBounds()) + break; + if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1) { + V = GEP->getOperand(0); + Constant *GEPIndex = static_cast(GEP->getOperand(1)); + Index = ConstantExpr::getAdd( + Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType)); + continue; + } + break; + } + if (auto *CI = dyn_cast(V)) { + if (!CI->isNoopCast(DL)) + break; + V = CI->getOperand(0); + continue; + } + if (auto *CI = dyn_cast(V)) { + if (!CI->isNoopCast(DL)) + break; + V = CI->getOperand(0); + continue; + } + break; + } + return {V, Index}; +} + +// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant. +// We can look through PHIs, GEPs and casts in order to determine a +// common base between GEPLHS and RHS. +static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS, + ICmpInst::Predicate Cond, + const DataLayout &DL) { + if (!GEPLHS->hasAllConstantIndices()) + return nullptr; + + Value *PtrBase, *Index; + std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL); + + // The set of nodes that will take part in this transformation. + SetVector Nodes; + + if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes)) + return nullptr; + + // We know we can re-write this as + // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) + // Since we've only looked through inbouds GEPs we know that we + // can't have overflow on either side. We can therefore re-write + // this as: + // OFFSET1 cmp OFFSET2 + Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes); + + // RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written + // GEP having PtrBase as the pointer base, and has returned in NewRHS the + // offset. Since Index is the offset of LHS to the base pointer, we will now + // compare the offsets instead of comparing the pointers. + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Index, NewRHS); +} + /// FoldGEPICmp - Fold comparisons between a GEP instruction and something /// else. At this point we know that the GEP is on the LHS of the comparison. Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, @@ -674,8 +989,9 @@ } // Otherwise, the base pointers are different and the indices are - // different, bail out. - return nullptr; + // different. Try convert this to an indexed compare by looking through + // PHIs/casts. + return transformToIndexedCompare(GEPLHS, RHS, Cond, DL); } // If one of the GEPs has all zero indices, recurse. @@ -727,7 +1043,10 @@ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); } } - return nullptr; + + // Try convert this to an indexed compare by looking through PHIs/casts as a + // last resort. + return transformToIndexedCompare(GEPLHS, RHS, Cond, DL); } Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca, Index: test/Transforms/InstCombine/indexed-gep-compares.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/indexed-gep-compares.ll @@ -0,0 +1,100 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" + +define i32 *@test1(i32* %A, i32 %Offset) { +entry: + %tmp = getelementptr inbounds i32, i32* %A, i32 %Offset + br label %bb + +bb: + %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ] + %LHS = getelementptr inbounds i32, i32* %A, i32 100 + %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1 + %cond = icmp ult i32 * %LHS, %RHS + br i1 %cond, label %bb2, label %bb + +bb2: + ret i32* %RHS + +; CHECK-LABEL: @test1( +; CHECK: %[[INDEX:[0-9A-Za-z.]+]] = phi i32 [ %[[ADD:[0-9A-Za-z.]+]], %bb ], [ %Offset, %entry ] +; CHECK: %[[ADD]] = add i32 %[[INDEX]], 1 +; CHECK: %cond = icmp sgt i32 %[[INDEX]], 100 +; CHECK: br i1 %cond, label %bb2, label %bb +; CHECK: %[[PTR:[0-9A-Za-z.]+]] = getelementptr inbounds i32, i32* %A, i32 %[[INDEX]] +; CHECK: ret i32* %[[PTR]] +} + +define i32 *@test2(i32 %A, i32 %Offset) { +entry: + %A.ptr = inttoptr i32 %A to i32* + %tmp = getelementptr inbounds i32, i32* %A.ptr, i32 %Offset + br label %bb + +bb: + %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ] + %LHS = getelementptr inbounds i32, i32* %A.ptr, i32 100 + %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1 + %cmp0 = ptrtoint i32 *%LHS to i32 + %cmp1 = ptrtoint i32 *%RHS to i32 + %cond = icmp ult i32 %cmp0, %cmp1 + br i1 %cond, label %bb2, label %bb + +bb2: + ret i32* %RHS + +; CHECK-LABEL: @test2( +; CHECK: %[[TOPTR:[0-9A-Za-z.]+]] = inttoptr i32 %[[ADD:[0-9A-Za-z.]+]] to i32* +; CHECK: %[[INDEX:[0-9A-Za-z.]+]] = phi i32 [ %[[ADD:[0-9A-Za-z.]+]], %bb ], [ %Offset, %entry ] +; CHECK: %[[ADD]] = add i32 %[[INDEX]], 1 +; CHECK: %cond = icmp sgt i32 %[[INDEX]], 100 +; CHECK: br i1 %cond, label %bb2, label %bb +; CHECK: %[[PTR:[0-9A-Za-z.]+]] = getelementptr inbounds i32, i32* %[[TOPTR]], i32 %[[INDEX]] +; CHECK: ret i32* %[[PTR]] +} + +; Perform the transformation only if we know that the GEPs used are inbounds. +define i32 *@test3(i32* %A, i32 %Offset) { +entry: + %tmp = getelementptr i32, i32* %A, i32 %Offset + br label %bb + +bb: + %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ] + %LHS = getelementptr i32, i32* %A, i32 100 + %RHS.next = getelementptr i32, i32* %RHS, i64 1 + %cond = icmp ult i32 * %LHS, %RHS + br i1 %cond, label %bb2, label %bb + +bb2: + ret i32* %RHS + +; CHECK-LABEL: @test3( +; CHECK-NOT: %cond = icmp sgt i32 %{{[0-9A-Za-z.]+}}, 100 +} + +; An inttoptr that requires an extension or truncation will be opaque when determining +; the base pointer. In this case we can still perform the transformation by considering +; A.ptr as being the base pointer. +define i32 *@test4(i16 %A, i32 %Offset) { +entry: + %A.ptr = inttoptr i16 %A to i32* + %tmp = getelementptr inbounds i32, i32* %A.ptr, i32 %Offset + br label %bb + +bb: + %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ] + %LHS = getelementptr inbounds i32, i32* %A.ptr, i32 100 + %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1 + %cmp0 = ptrtoint i32 *%LHS to i32 + %cmp1 = ptrtoint i32 *%RHS to i32 + %cond = icmp ult i32 %cmp0, %cmp1 + br i1 %cond, label %bb2, label %bb + +bb2: + ret i32* %RHS + +; CHECK-LABEL: @test4( +; CHECK: %cond = icmp sgt i32 %{{[0-9A-Za-z.]+}}, 100 +}