Index: lib/Transforms/Scalar/GVN.cpp =================================================================== --- lib/Transforms/Scalar/GVN.cpp +++ lib/Transforms/Scalar/GVN.cpp @@ -744,14 +744,33 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, SmallVectorImpl &ValuesPerBlock, GVN &gvn) { - // Check for the fully redundant, dominating load case. In this case, we can - // just use the dominating value directly. - if (ValuesPerBlock.size() == 1 && - gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, + // Check if we have a fully redundant load by checking if we have only one + // available value, not counting the load we're going to be eliminating. + const AvailableValueInBlock *SingleValue = nullptr; + for (const AvailableValueInBlock &AV : ValuesPerBlock) { + // Discount this available value if it's the load we're eliminating. + if ((AV.AV.isSimpleValue() && AV.AV.getSimpleValue() == LI) || + (AV.AV.isCoercedLoadValue() && AV.AV.getCoercedLoadValue() == LI)) { + continue; + } + // If we've already seen a value then we have more than one, so we don't + // have a single value. + if (SingleValue) { + SingleValue = nullptr; + break; + } + // We haven't seen any available values before this, so if we have a single + // value this is it. + SingleValue = &AV; + } + // If we have a fully redundant load check if it dominates LI, as in that case + // we can just use the dominating value directly. + if (SingleValue && + gvn.getDominatorTree().properlyDominates(SingleValue->BB, LI->getParent())) { - assert(!ValuesPerBlock[0].AV.isUndefValue() && + assert(!SingleValue->AV.isUndefValue() && "Dead BB dominate this block"); - return ValuesPerBlock[0].MaterializeAdjustedValue(LI, gvn); + return SingleValue->MaterializeAdjustedValue(LI, gvn); } // Otherwise, we have to construct SSA form. Index: test/Transforms/GVN/pre-after-rle.ll =================================================================== --- /dev/null +++ test/Transforms/GVN/pre-after-rle.ll @@ -0,0 +1,36 @@ +; RUN: opt -gvn -S < %s | FileCheck %s + +declare noalias i8* @malloc(i64) + +; Detecting that %s is fully redundant should let us detect that %w is partially +; redundant. +define void @fn(i32** noalias %start, i32* %width, i32 %h) { +entry: + %call = tail call noalias i8* @malloc(i64 1024) + %call.cast = bitcast i8* %call to i32* + store i32* %call.cast, i32** %start, align 8 + br label %preheader + +preheader: + %cmp = icmp slt i32 1, %h + br i1 %cmp, label %body, label %exit + +; CHECK-LABEL: preheader.body_crit_edge: +; CHECK: load i32, i32* %width, align 8 + +; CHECK-LABEL: body: +; CHECK-NOT: load i32*, i32** %start, align 8 +; CHECK-NOT: load i32, i32* %width, align 8 +body: + %j = phi i32 [ 0, %preheader ], [ %j.next, %body ] + %s = load i32*, i32** %start, align 8 + %idx = getelementptr inbounds i32, i32* %s, i64 0 + store i32 0, i32* %idx, align 4 + %j.next = add nuw nsw i32 %j, 1 + %w = load i32, i32* %width, align 8 + %cmp3 = icmp slt i32 %j.next, %w + br i1 %cmp3, label %body, label %preheader + +exit: + ret void +}