Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -665,6 +665,10 @@ Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); Instruction *FoldPHIArgZextsIntoPHI(PHINode &PN); + /// If an integer typed PHI has only one use which is an IntToPtr operation, + /// replace the PHI with an existing pointer typed PHI if it exists. Otherwise + /// insert a new pointer typed PHI and replace the original one. + Instruction *FoldIntegerTypedPHI(PHINode &PN); /// Helper function for FoldPHIArgXIntoPHI() to get debug location for the /// folded operation. Index: lib/Transforms/InstCombine/InstCombinePHI.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombinePHI.cpp +++ lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -39,6 +39,222 @@ return Loc; } +// Replace Integer typed PHI PN if the PHI's value is used as a pointer value. +// If there is an existing pointer typed PHI that produces the same value as PN, +// replace PN and the IntToPtr operation with it. Otherwise, synthesize a new +// PHI node: +// +// Case-1: +// bb1: +// int_init = PtrToInt(ptr_init) +// br label %bb2 +// bb2: +// int_val = PHI([int_init, %bb1], [int_val_inc, %bb2] +// ptr_val = PHI([ptr_init, %bb1], [ptr_val_inc, %bb2] +// ptr_val2 = IntToPtr(int_val) +// ... +// use(ptr_val2) +// ptr_val_inc = ... +// inc_val_inc = PtrToInt(ptr_val_inc) +// +// ==> +// bb1: +// br label %bb2 +// bb2: +// ptr_val = PHI([ptr_init, %bb1], [ptr_val_inc, %bb2] +// ... +// use(ptr_val) +// ptr_val_inc = ... +// +// Case-2: +// bb1: +// int_ptr = BitCast(ptr_ptr) +// int_init = Load(int_ptr) +// br label %bb2 +// bb2: +// int_val = PHI([int_init, %bb1], [int_val_inc, %bb2] +// ptr_val2 = IntToPtr(int_val) +// ... +// use(ptr_val2) +// ptr_val_inc = ... +// inc_val_inc = PtrToInt(ptr_val_inc) +// ==> +// bb1: +// ptr_init = Load(ptr_ptr) +// br label %bb2 +// bb2: +// ptr_val = PHI([ptr_init, %bb1], [ptr_val_inc, %bb2] +// ... +// use(ptr_val) +// ptr_val_inc = ... +// ... +// +Instruction *InstCombiner::FoldIntegerTypedPHI(PHINode &PN) { + if (!PN.getType()->isIntegerTy()) + return nullptr; + if (!PN.hasOneUse()) + return nullptr; + + auto *IntToPtr = dyn_cast(PN.user_back()); + if (!IntToPtr) + return nullptr; + + // Check if the pointer is actually used as pointer: + auto HasPointerUse = [](Instruction *IIP) { + for (User *U : IIP->users()) { + Value *Ptr = nullptr; + if (LoadInst *LoadI = dyn_cast(U)) { + Ptr = LoadI->getPointerOperand(); + } else if (StoreInst *SI = dyn_cast(U)) { + Ptr = SI->getPointerOperand(); + } else if (GetElementPtrInst *GI = dyn_cast(U)) { + Ptr = GI->getPointerOperand(); + } + + if (Ptr && Ptr == IIP) + return true; + } + return false; + }; + + if (!HasPointerUse(IntToPtr)) + return nullptr; + + if (DL.getPointerSizeInBits(IntToPtr->getAddressSpace()) != + DL.getTypeSizeInBits(IntToPtr->getOperand(0)->getType())) + return nullptr; + + SmallVector AvailablePtrVals; + for (unsigned i = 0; i != PN.getNumIncomingValues(); ++i) { + Value *Arg = PN.getIncomingValue(i); + + // First look backward: + if (auto *PI = dyn_cast(Arg)) { + AvailablePtrVals.emplace_back(PI->getOperand(0)); + continue; + } + + // Next look forward: + Value *ArgIntToPtr = nullptr; + for (User *U : Arg->users()) { + if (isa(U) && U->getType() == IntToPtr->getType() && + (DT.dominates(cast(U), PN.getIncomingBlock(i)) || + cast(U)->getParent() == PN.getIncomingBlock(i))) { + ArgIntToPtr = U; + break; + } + } + + if (ArgIntToPtr) { + AvailablePtrVals.emplace_back(ArgIntToPtr); + continue; + } + + // If Arg is defined by a PHI, allow it. This will also create + // more opportunities iteratively. + if (isa(Arg)) { + AvailablePtrVals.emplace_back(Arg); + continue; + } + + // For a single use integer load: + auto *LoadI = dyn_cast(Arg); + if (!LoadI) + return nullptr; + + if (!LoadI->hasOneUse()) + return nullptr; + + // Push the integer typed Load instruction into the available + // value set, and fix it up later when the pointer typed PHI + // is synthesized. + AvailablePtrVals.emplace_back(LoadI); + } + + // Now search for a matching PHI + auto *BB = PN.getParent(); + assert(AvailablePtrVals.size() == PN.getNumIncomingValues() && + "Not enough available ptr typed incoming values"); + PHINode *MatchingPtrPHI = nullptr; + for (auto II = BB->begin(), EI = BasicBlock::iterator(BB->getFirstNonPHI()); + II != EI; II++) { + PHINode *PtrPHI = dyn_cast(II); + if (!PtrPHI || PtrPHI == &PN) + continue; + MatchingPtrPHI = PtrPHI; + for (unsigned i = 0; i != PtrPHI->getNumIncomingValues(); ++i) { + if (AvailablePtrVals[i] != PtrPHI->getIncomingValue(i)) { + MatchingPtrPHI = nullptr; + break; + } + } + + if (MatchingPtrPHI) + break; + } + + if (MatchingPtrPHI) { + assert(MatchingPtrPHI->getType() == IntToPtr->getType() && + "Phi's Type does not match with IntToPtr"); + // The PtrToCast + IntToPtr will be simplified later + return CastInst::CreateBitOrPointerCast(MatchingPtrPHI, + IntToPtr->getOperand(0)->getType()); + } + + // If it requires a conversion for every PHI operand, do not do it. + if (std::all_of(AvailablePtrVals.begin(), AvailablePtrVals.end(), + [&](Value *V) { + return (V->getType() != IntToPtr->getType()) || + isa(V); + })) + return nullptr; + + PHINode *NewPtrPHI = PHINode::Create( + IntToPtr->getType(), PN.getNumIncomingValues(), PN.getName() + ".ptr"); + + InsertNewInstBefore(NewPtrPHI, PN); + for (unsigned i = 0; i != PN.getNumIncomingValues(); ++i) { + auto *IncomingBB = PN.getIncomingBlock(i); + auto *IncomingVal = AvailablePtrVals[i]; + + if (IncomingVal->getType() == IntToPtr->getType()) { + NewPtrPHI->addIncoming(IncomingVal, IncomingBB); + continue; + } + +#ifndef NDEBUG + LoadInst *LoadI = dyn_cast(IncomingVal); + assert((isa(IncomingVal) || (LoadI && LoadI->hasOneUse())) && + "Can not replace LoadInst with multiple uses"); +#endif + // Need to insert a BitCast. + // For an integer Load instruction with a single use, the load + IntToPtr + // cast will be simplified into a pointer load: + // %v = load i64, i64* %a.ip, align 8 + // %v.cast = inttoptr i64 %v to float ** + // ==> + // %v.ptrp = bitcast i64 * %a.ip to float ** + // %v.cast = load float *, float ** %v.ptrp, align 8 + auto *CI = CastInst::CreateBitOrPointerCast( + IncomingVal, IntToPtr->getType(), IncomingVal->getName() + ".ptr"); + if (auto *IncomingI = dyn_cast(IncomingVal)) { + BasicBlock::iterator InsertPos(IncomingI); + InsertPos++; + if (isa(IncomingI)) + InsertPos = IncomingI->getParent()->getFirstInsertionPt(); + InsertNewInstBefore(CI, *InsertPos); + } else { + auto *InsertBB = &IncomingBB->getParent()->getEntryBlock(); + InsertNewInstBefore(CI, *InsertBB->getFirstInsertionPt()); + } + NewPtrPHI->addIncoming(CI, IncomingBB); + } + + // The PtrToCast + IntToPtr will be simplified later + return CastInst::CreateBitOrPointerCast(NewPtrPHI, + IntToPtr->getOperand(0)->getType()); +} + /// If we have something like phi [add (a,b), add(a,c)] and if a/b/c and the /// adds all have a single use, turn this into a phi and a single binop. Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { @@ -902,6 +1118,9 @@ // this PHI only has a single use (a PHI), and if that PHI only has one use (a // PHI)... break the cycle. if (PN.hasOneUse()) { + if (Instruction *Result = FoldIntegerTypedPHI(PN)) + return Result; + Instruction *PHIUser = cast(PN.user_back()); if (PHINode *PU = dyn_cast(PHIUser)) { SmallPtrSet PotentiallyDeadPHIs; Index: test/Transforms/InstCombine/intptr1.ll =================================================================== --- test/Transforms/InstCombine/intptr1.ll +++ test/Transforms/InstCombine/intptr1.ll @@ -0,0 +1,181 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + + +define void @test1(float* %a, float* readnone %a_end, i64* %b.i64) { +; CHECK-LABEL: @test1 +entry: + %cmp1 = icmp ult float* %a, %a_end + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %b = load i64, i64* %b.i64, align 8 +; CHECK: load float*, float** + br label %for.body + +for.body: ; preds = %for.body, %for.body.preheader + %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] + %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ] + +; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] +; CHECK: %b.addr.02.ptr = phi float* [ %add, %for.body ], +; CHECK-NOT: %b.addr.02 = phi i64 + + %tmp = inttoptr i64 %b.addr.02 to float* +; CHECK-NOT: inttoptr i64 + %tmp1 = load float, float* %tmp, align 4 + %mul.i = fmul float %tmp1, 4.200000e+01 + store float %mul.i, float* %a.addr.03, align 4 + %add = getelementptr inbounds float, float* %tmp, i64 1 + %add.int = ptrtoint float* %add to i64 +; CHECK-NOT: ptrtoint float* + %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 + %cmp = icmp ult float* %incdec.ptr, %a_end + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @test1_neg(float* %a, float* readnone %a_end, i64* %b.i64) { +; CHECK-LABEL: @test1_neg +entry: + %cmp1 = icmp ult float* %a, %a_end + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %b = load i64, i64* %b.i64, align 8 + br label %for.body + +for.body: ; preds = %for.body, %for.body.preheader + %a.addr.03 = phi float* [ %incdec.ptr, %bb ], [ %a, %for.body.preheader ] + %b.addr.02 = phi i64 [ %add.int, %bb ], [ %b, %for.body.preheader ] + +; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %bb ], [ %a, %for.body.preheader ] +; CHECK: %b.addr.02 = phi i64 + + %tmp = inttoptr i64 %b.addr.02 to float* +; CHECK: inttoptr i64 + %ptrcmp = icmp ult float* %tmp, %a_end + br i1 %ptrcmp, label %for.end, label %bb + +bb: + %tmp1 = load float, float* %a, align 4 + %mul.i = fmul float %tmp1, 4.200000e+01 + store float %mul.i, float* %a.addr.03, align 4 + %add = getelementptr inbounds float, float* %a, i64 1 + %add.int = ptrtoint float* %add to i64 +; CHECK: ptrtoint float* + %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 + %cmp = icmp ult float* %incdec.ptr, %a_end + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +define void @test2(float* %a, float* readnone %a_end, float** %b.float) { +; CHECK-LABEL: @test2 +entry: + %cmp1 = icmp ult float* %a, %a_end + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %b.i64 = bitcast float** %b.float to i64* + %b = load i64, i64* %b.i64, align 8 +; CHECK: load float*, float** + br label %for.body + +for.body: ; preds = %for.body, %for.body.preheader + %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] + %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ] + +; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] +; CHECK: %b.addr.02.ptr = phi float* [ %add, %for.body ], +; CHECK-NOT: %b.addr.02 = phi i64 + + %tmp = inttoptr i64 %b.addr.02 to float* +; CHECK-NOT: inttoptr i64 + %tmp1 = load float, float* %tmp, align 4 + %mul.i = fmul float %tmp1, 4.200000e+01 + store float %mul.i, float* %a.addr.03, align 4 + %add = getelementptr inbounds float, float* %tmp, i64 1 + %add.int = ptrtoint float* %add to i64 +; CHECK-NOT: ptrtoint float* + %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 + %cmp = icmp ult float* %incdec.ptr, %a_end + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +define void @test3(float* %a, float* readnone %a_end, i8** %b.i8p) { +; CHECK-LABEL: @test3 +entry: + %cmp1 = icmp ult float* %a, %a_end + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %b.i64 = bitcast i8** %b.i8p to i64* + %b = load i64, i64* %b.i64, align 8 +; CHECK: load float*, float** + br label %for.body + +for.body: ; preds = %for.body, %for.body.preheader + %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] + %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ] + +; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] +; CHECK: %b.addr.02.ptr = phi float* [ %add, %for.body ], +; CHECK-NOT: %b.addr.02 = phi i64 + + %tmp = inttoptr i64 %b.addr.02 to float* +; CHECK-NOT: inttoptr i64 + %tmp1 = load float, float* %tmp, align 4 + %mul.i = fmul float %tmp1, 4.200000e+01 + store float %mul.i, float* %a.addr.03, align 4 + %add = getelementptr inbounds float, float* %tmp, i64 1 + %add.int = ptrtoint float* %add to i64 +; CHECK-NOT: ptrtoint float* + %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 + %cmp = icmp ult float* %incdec.ptr, %a_end + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +define void @test4(float* %a, float* readnone %a_end, float** %b.float) { +entry: +; CHECK-LABEL: @test4 + %cmp1 = icmp ult float* %a, %a_end + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %b.f = load float*, float** %b.float, align 8 + %b = ptrtoint float* %b.f to i64 +; CHECK: load float*, float** +; CHECK-NOT: ptrtoint float* + br label %for.body + +for.body: ; preds = %for.body, %for.body.preheader + %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] + %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ] + %tmp = inttoptr i64 %b.addr.02 to float* +; CHECK-NOT: inttoptr i64 + %tmp1 = load float, float* %tmp, align 4 + %mul.i = fmul float %tmp1, 4.200000e+01 + store float %mul.i, float* %a.addr.03, align 4 + %add = getelementptr inbounds float, float* %tmp, i64 1 + %add.int = ptrtoint float* %add to i64 +; CHECK-NOT: ptrtoint float* + %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 + %cmp = icmp ult float* %incdec.ptr, %a_end + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} Index: test/Transforms/InstCombine/intptr2.ll =================================================================== --- test/Transforms/InstCombine/intptr2.ll +++ test/Transforms/InstCombine/intptr2.ll @@ -0,0 +1,37 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +define void @test1(float* %a, float* readnone %a_end, i32* %b.i) { +; CHECK-LABEL: @test1 +entry: + %cmp1 = icmp ult float* %a, %a_end + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %b = ptrtoint i32 * %b.i to i64 +; CHECK: bitcast +; CHECK-NOT: ptrtoint + br label %for.body + +for.body: ; preds = %for.body, %for.body.preheader + %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] + %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ] + +; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] +; CHECK-NOT: phi i64 + + %tmp = inttoptr i64 %b.addr.02 to float* +; CHECK-NOT: inttoptr + %tmp1 = load float, float* %tmp, align 4 + %mul.i = fmul float %tmp1, 4.200000e+01 + store float %mul.i, float* %a.addr.03, align 4 + %add = getelementptr inbounds float, float* %tmp, i64 1 + %add.int = ptrtoint float* %add to i64 +; CHECK-NOT: ptrtoint + %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 + %cmp = icmp ult float* %incdec.ptr, %a_end + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + Index: test/Transforms/InstCombine/intptr3.ll =================================================================== --- test/Transforms/InstCombine/intptr3.ll +++ test/Transforms/InstCombine/intptr3.ll @@ -0,0 +1,37 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + + +define void @test(float* %a, float* readnone %a_end, i64 %b) unnamed_addr { +entry: + %cmp1 = icmp ult float* %a, %a_end + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %b.float = inttoptr i64 %b to float* + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] + %b.addr.float = phi float* [ %b.addr.float.inc, %for.body ], [ %b.float, %for.body.preheader ] + %b.addr.i64 = phi i64 [ %b.addr.i64.inc, %for.body ], [ %b, %for.body.preheader ] +; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] +; CHECK-NEXT: %b.addr.float = phi float* [ %b.addr.float.inc, %for.body ], [ %b.float, %for.body.preheader ] +; CHECK-NEXT: = load float + %l = load float, float* %b.addr.float, align 4 + %mul.i = fmul float %l, 4.200000e+01 + store float %mul.i, float* %a.addr.03, align 4 + %b.addr.float.2 = inttoptr i64 %b.addr.i64 to float* +; CHECK-NOT: inttoptr + %b.addr.float.inc = getelementptr inbounds float, float* %b.addr.float.2, i64 1 + %b.addr.i64.inc = ptrtoint float* %b.addr.float.inc to i64 +; CHECK-NOT: ptrtoint + %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 + %cmp = icmp ult float* %incdec.ptr, %a_end + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + Index: test/Transforms/InstCombine/intptr4.ll =================================================================== --- test/Transforms/InstCombine/intptr4.ll +++ test/Transforms/InstCombine/intptr4.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + + +define void @test(float* %a, float* readnone %a_end, i64 %b, float* %bf) unnamed_addr { +entry: + %cmp1 = icmp ult float* %a, %a_end + %b.float = inttoptr i64 %b to float* + br i1 %cmp1, label %bb1, label %bb2 + +bb1: + br label %for.body.preheader +bb2: + %bfi = ptrtoint float* %bf to i64 + br label %for.body.preheader + +for.body.preheader: ; preds = %entry + %b.phi = phi i64 [%b, %bb1], [%bfi, %bb2] +; CHECK-LABEL: for.body.preheader +; CHECK-NOT: %b.phi = phi i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body +; CHECK-LABEL: for.body + %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] + %b.addr.float = phi float* [ %b.addr.float.inc, %for.body ], [ %b.float, %for.body.preheader ] + %b.addr.i64 = phi i64 [ %b.addr.i64.inc, %for.body ], [ %b.phi, %for.body.preheader ] +; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] +; CHECK-NEXT: %b.addr.float = phi float* [ %b.addr.float.inc, %for.body ], [ %b.float, %for.body.preheader ] +; CHECK-NOT: = %b.addr.i64 + %l = load float, float* %b.addr.float, align 4 + %mul.i = fmul float %l, 4.200000e+01 + store float %mul.i, float* %a.addr.03, align 4 + %b.addr.float.2 = inttoptr i64 %b.addr.i64 to float* +; CHECK-NOT: inttoptr + %b.addr.float.inc = getelementptr inbounds float, float* %b.addr.float.2, i64 1 + %b.addr.i64.inc = ptrtoint float* %b.addr.float.inc to i64 +; CHECK-NOT: ptrtoint + %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 + %cmp = icmp ult float* %incdec.ptr, %a_end + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +