Index: lib/Transforms/InstCombine/InstCombinePHI.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombinePHI.cpp +++ lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -784,6 +784,59 @@ return ReplaceInstUsesWith(FirstPhi, Undef); } +// Fold a CastInst user back through a Phi. This only makes sense when all the +// inputs to the Phi are either casts as well (between the same types as the +// CastInst in question), or loads. For the latter case, this op creates new +// casts, which can later be folded into the load. +static +PHINode *foldCastIntoPHI(InstCombiner &IC, PHINode &PN, CastInst *CI) { + const DataLayout &DL = IC.getDataLayout(); + + if (!CI || !CI->isNoopCast(DL)) + return nullptr; + Type* CastTy = CI->getDestTy(); + + // Check whether the only incoming values are loads and bitcasts + // There will be at least one of either, as the all-operands-equal case is + // handled earlier. + for (Use &In: PN.incoming_values()) { + // A load may have only one use, otherwise we might loop infinitely + if (isa(In.get()) && In->hasOneUse()) + continue; + // We can handle an incoming cast if it's a noop and has a matching type + if (auto *ArgCI = dyn_cast(In.get())) + if (ArgCI->isNoopCast(DL) && ArgCI->getSrcTy() == CastTy) + continue; + return nullptr; + } + + PHINode *NewPN = PHINode::Create(CastTy, PN.getNumIncomingValues(), + PN.getName()); + IC.InsertNewInstBefore(NewPN, PN); + // Populate the new phi + for (Use &In: PN.incoming_values()) { + if (auto *LI = dyn_cast(In)) { + // Old incoming load needs to be cast to the requested type + auto *NewCI = CastInst::CreateBitOrPointerCast(LI, CastTy); + NewPN->addIncoming(NewCI, PN.getIncomingBlock(In)); + NewCI->insertAfter(LI); + // Take another look at the load, later, to fold the new cast into it + IC.Worklist.Add(LI); + } else if (auto *ArgCI = dyn_cast(In)) { + // Old incoming cast can be skipped + NewPN->addIncoming(ArgCI->getOperand(0), PN.getIncomingBlock(In)); + } else + llvm_unreachable("Not a CastInst or LoadInst"); + } + + // plug in the new PHI + IC.ReplaceInstUsesWith(*CI, NewPN); + IC.EraseInstFromFunction(*CI); + + return &PN; +} + + // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { @@ -825,6 +878,13 @@ PHIUser->user_back() == &PN) { return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); } + + // If the only user of a phi is a CastInst, we may be able to fold this cast + // back through the phi and simplify the code, given that the phi's only + // incoming values are loads or matching casts. + if (auto *CI = dyn_cast(PHIUser)) + if (PHINode *R = foldCastIntoPHI(*this, PN, CI)) + return R; } // We sometimes end up with phi cycles that non-obviously end up being the Index: test/Transforms/InstCombine/fold-phi-cast-load.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/fold-phi-cast-load.ll @@ -0,0 +1,60 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s +; ModuleID = 'phi_cast.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.Point = type { i32, i32 } +declare void @something(%struct.Point*) + +define void @test1(%struct.Point** %point_array, %struct.Point* %end) { +entry: + %arrayidx = getelementptr inbounds %struct.Point*, %struct.Point** %point_array, i64 0 + %canon = bitcast %struct.Point** %arrayidx to i64* + %0 = load i64, i64* %canon + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %e_canon = phi i64 [ %0, %entry ], [ %incdec_canon, %for.body ] + %e.0 = inttoptr i64 %e_canon to %struct.Point* + %cmp = icmp ne %struct.Point* %e.0, %end + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + call void @something(%struct.Point* %e.0) + %incdec.ptr = getelementptr inbounds %struct.Point, %struct.Point* %e.0, i32 1 + %incdec_canon = ptrtoint %struct.Point* %incdec.ptr to i64 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} +; LABEL: test1 +; CHECK: load %struct.Point* +; CHECK: phi %struct.Point* + +; Check if we handle pointer size correctly +define void @test2(%struct.Point** %point_array, %struct.Point* %end) { +entry: + %arrayidx = getelementptr inbounds %struct.Point*, %struct.Point** %point_array, i64 0 + %canon = bitcast %struct.Point** %arrayidx to i32* + %0 = load i32, i32* %canon + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %e_canon = phi i32 [ %0, %entry ], [ %incdec_canon, %for.body ] + %e.0 = inttoptr i32 %e_canon to %struct.Point* + %cmp = icmp ne %struct.Point* %e.0, %end + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + call void @something(%struct.Point* %e.0) + %incdec.ptr = getelementptr inbounds %struct.Point, %struct.Point* %e.0, i32 1 + %incdec_canon = ptrtoint %struct.Point* %incdec.ptr to i32 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} +; LABEL: test2 +; CHECK: load i32 +; CHECK: phi i32