diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1164,15 +1164,30 @@ // Do PHI translation to get its value in the predecessor if necessary. The // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. + // We do the translation for each edge we skipped by going from LI's block + // to LoadBB, otherwise we might miss pieces needing translation. // If all preds have a single successor, then we know it is safe to insert // the load on the pred (?!?), so we can insert code to materialize the // pointer if it is not available. - PHITransAddr Address(LI->getPointerOperand(), DL, AC); - Value *LoadPtr = nullptr; - LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, - *DT, NewInsts); + Value *LoadPtr = LI->getPointerOperand(); + BasicBlock *Cur = LI->getParent(); + while (Cur != LoadBB) { + PHITransAddr Address(LoadPtr, DL, AC); + LoadPtr = Address.PHITranslateWithInsertion( + Cur, Cur->getSinglePredecessor(), *DT, NewInsts); + if (!LoadPtr) { + CanDoPRE = false; + break; + } + Cur = Cur->getSinglePredecessor(); + } + if (LoadPtr) { + PHITransAddr Address(LoadPtr, DL, AC); + LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, *DT, + NewInsts); + } // If we couldn't find or insert a computation of this phi translated value, // we fail PRE. if (!LoadPtr) { @@ -1187,8 +1202,12 @@ if (!CanDoPRE) { while (!NewInsts.empty()) { - Instruction *I = NewInsts.pop_back_val(); - markInstructionForDeletion(I); + // Erase instructions generated by the failed PHI translation before + // trying to number them. PHI translation might insert instructions + // in basic blocks other than the current one, and we delete them + // directly, as markInstructionForDeletion only allows removing from the + // current basic block. + NewInsts.pop_back_val()->eraseFromParent(); } // HINT: Don't revert the edge-splitting as following transformation may // also need to split these critical edges. diff --git a/llvm/test/Transforms/GVN/PRE/rle.ll b/llvm/test/Transforms/GVN/PRE/rle.ll --- a/llvm/test/Transforms/GVN/PRE/rle.ll +++ b/llvm/test/Transforms/GVN/PRE/rle.ll @@ -546,6 +546,130 @@ ret i8 %R } +declare void @use_i32(i32) readonly + +; indirectbr currently prevents MergeBlockIntoPredecessor from merging latch +; into header. Make sure we translate the address for %l1 correctly where +; parts of the address computations are in different basic blocks. +define i32 @phi_trans6(i32* noalias nocapture readonly %x, i1 %cond) { +; CHECK-LABEL: define i32 @phi_trans6( +; CHECK-LABEL: entry: +; CHECK-NEXT: %l0 = load i32, i32* %x +; +; CHECK-LABEL: header: +; CHECK-NEXT: %l1 = phi i32 [ %l0, %entry ], [ %l1.pre, %latch.header_crit_edge ] +; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %latch.header_crit_edge ] +; CHECK-NEXT: indirectbr i8* blockaddress(@phi_trans6, %latch), [label %latch] +; +; CHECK-LABEL: latch: +; CHECK-NEXT: %iv.next = add i32 %iv, 1 +; CHECK-NEXT: br i1 %cond, label %exit, label %latch.header_crit_edge +; +; CHECK-LABEL: latch.header_crit_edge: +; CHECK-NEXT: %gep.1.phi.trans.insert.phi.trans.insert = getelementptr i32, i32* %x, i32 %iv.next +; CHECK-NEXT: %l1.pre = load i32, i32* %gep.1.phi.trans.insert.phi.trans.insert +; CHECK-LABEL: br label %header +; +entry: + %l0 = load i32, i32* %x + call void @use_i32(i32 %l0) + br label %header + +header: + %iv = phi i32 [0, %entry], [ %iv.next, %latch] + indirectbr i8* blockaddress(@phi_trans6, %latch), [label %latch] + +latch: + %gep.1 = getelementptr i32, i32* %x, i32 %iv + %l1 = load i32, i32* %gep.1 + %iv.next = add i32 %iv, 1 + br i1 %cond, label %exit, label %header + +exit: + ret i32 %l1 +} + +; FIXME: Currently we fail to translate the PHI in this case. +define i32 @phi_trans7(i32* noalias nocapture readonly %x, i1 %cond) { +; CHECK-LABEL: define i32 @phi_trans7( +; CHECK-LABEL: entry: +; CHECK-NEXT: %l0 = load i32, i32* %x +; +; CHECK-LABEL: header: +; CHECK-NEXT: %iv = phi i32 [ 2, %entry ], [ %iv.next, %latch.header_crit_edge ] +; CHECK-NEXT: %offset = add i32 %iv, -2 +; CHECK-NEXT: indirectbr i8* blockaddress(@phi_trans7, %latch), [label %latch] +; +; CHECK-LABEL: latch: +; CHECK-NEXT: %gep.1 = getelementptr i32, i32* %x, i32 %offset +; CHECK-NEXT: %l1 = load i32, i32* %gep.1 +; CHECK-NEXT: %iv.next = add i32 %iv, 1 +; CHECK-NEXT: br i1 %cond, label %exit, label %latch.header_crit_edge +; +; CHECK-LABEL: latch.header_crit_edge: +; CHECK-LABEL: br label %header +; +entry: + %l0 = load i32, i32* %x + call void @use_i32(i32 %l0) + br label %header + +header: + %iv = phi i32 [2, %entry], [ %iv.next, %latch] + %offset = add i32 %iv, -2 + indirectbr i8* blockaddress(@phi_trans7, %latch), [label %latch] + +latch: + %gep.1 = getelementptr i32, i32* %x, i32 %offset + %l1 = load i32, i32* %gep.1 + %iv.next = add i32 %iv, 1 + br i1 %cond, label %exit, label %header + +exit: + ret i32 %l1 +} + +; FIXME: Currently we fail to translate the PHI in this case. +define i32 @phi_trans8(i32* noalias nocapture readonly %x, i1 %cond) { +; CHECK-LABEL: define i32 @phi_trans8( +; CHECK-LABEL: entry: +; CHECK-NEXT: %l0 = load i32, i32* %x +; +; CHECK-LABEL: header: +; CHECK-NEXT: %iv = phi i32 [ 2, %entry ], [ %iv.next, %latch.header_crit_edge ] +; CHECK-NEXT: indirectbr i8* blockaddress(@phi_trans8, %latch), [label %latch] +; +; CHECK-LABEL: latch: +; CHECK-NEXT: %offset = add i32 %iv, -2 +; CHECK-NEXT: %gep.1 = getelementptr i32, i32* %x, i32 %offset +; CHECK-NEXT: %l1 = load i32, i32* %gep.1 +; CHECK-NEXT: %iv.next = add i32 %iv, 1 +; CHECK-NEXT: br i1 %cond, label %exit, label %latch.header_crit_edge +; +; CHECK-LABEL: latch.header_crit_edge: +; CHECK-LABEL: br label %header +; +entry: + %l0 = load i32, i32* %x + call void @use_i32(i32 %l0) + br label %header + +header: + %iv = phi i32 [2, %entry], [ %iv.next, %latch] + indirectbr i8* blockaddress(@phi_trans8, %latch), [label %latch] + +latch: + %offset = add i32 %iv, -2 + %gep.1 = getelementptr i32, i32* %x, i32 %offset + %l1 = load i32, i32* %gep.1 + %iv.next = add i32 %iv, 1 + br i1 %cond, label %exit, label %header + +exit: + ret i32 %l1 +} + + ; PR6642 define i32 @memset_to_load() nounwind readnone { @@ -661,6 +785,7 @@ ; CHECK: ret i32 } + declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind