diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -220,6 +220,8 @@ // pass in the loaded pointers. SmallVector Args; const DataLayout &DL = F->getParent()->getDataLayout(); + SmallVector DeadArgs; + while (!F->use_empty()) { CallBase &CB = cast(*F->user_back()); assert(CB.getCalledFunction() == F); @@ -255,6 +257,29 @@ Args.push_back(LI); ArgAttrVec.push_back(AttributeSet()); } + } else { // ArgsToPromote.count(&*I) && I->use_empty() + assert(ArgsToPromote.count(&*I) && I->use_empty()); + auto Op = AI->get(); + // IsDead if the only use is the call + // After removing it from the CallBase, it will have no other use + bool IsDead = Op->use_empty(); + if (Op->hasOneUse()) { + auto &OnlyUse = *Op->use_begin(); + IsDead |= (OnlyUse == Op); + // Be sure the use is the dead callbase + if (!IsDead && isa(OnlyUse)) + IsDead |= cast(OnlyUse) == &CB; + } + if (IsDead) { + if (isa(Op)) { + Instruction *DeadInst = dyn_cast(Op); + // We may visit a callsite more than once, only add the arg once. + if (std::find(DeadArgs.begin(), DeadArgs.end(), DeadInst) == + DeadArgs.end()) { + DeadArgs.push_back(DeadInst); + } + } + } } } @@ -297,6 +322,35 @@ CB.eraseFromParent(); } + for (auto &DeadInst : DeadArgs) { + // Promotion may result in creating dead loads. We may hoist a load, + // then optimize out the uses of the new argument in the body of the callee. + // In a subsequent invocation, we remove the now dead argument from the + // call base, resulting in a phantom/dead load. + // The dead "instruction" may actually be a chain of gep-load. + if (isa(DeadInst) || isa(DeadInst)) { + auto Op = DeadInst->getOperand(0); + if (Op->hasOneUse() && + (isa(Op) || isa(Op))) { + auto &OnlyUse = *Op->use_begin(); + bool IsDead = OnlyUse == Op; + // Be sure the use is actually the dead load + if (!IsDead && isa(OnlyUse)) { + IsDead |= dyn_cast(OnlyUse) == DeadInst; + } + + if (IsDead) { + auto OtherDeadInst = dyn_cast(Op); + OtherDeadInst->replaceAllUsesWith( + PoisonValue::get(OtherDeadInst->getType())); + OtherDeadInst->eraseFromParent(); + } + } + DeadInst->replaceAllUsesWith(PoisonValue::get(DeadInst->getType())); + DeadInst->eraseFromParent(); + } + } + // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. diff --git a/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll b/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s + +%ptr.struct = type { ptr, ptr, ptr } + +define internal void @child(ptr %this, ptr %y, ptr %x) nounwind { +; CHECK-LABEL: @child( +; CHECK-NEXT: entry: +; CHECK-NEXT: store half [[X_0_VAL:%.*]], ptr [[Y:%.*]], align 2 +; CHECK-NEXT: ret void +; +entry: + %0 = load half, ptr %x + store half %0, ptr %y + ret void +} + +define internal void @parent(ptr %this, ptr %p1, ptr %p2) nounwind { +; CHECK-LABEL: @parent( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P2_VAL2:%.*]] = load half, ptr [[P2:%.*]], align 2 +; CHECK-NEXT: call fastcc void @child(ptr [[P1:%.*]], half [[P2_VAL2]]) +; CHECK-NEXT: [[P2_VAL1:%.*]] = load half, ptr [[P2]], align 2 +; CHECK-NEXT: call fastcc void @child(ptr [[P1]], half [[P2_VAL1]]) +; CHECK-NEXT: [[P2_VAL:%.*]] = load half, ptr [[P2]], align 2 +; CHECK-NEXT: call fastcc void @child(ptr [[P1]], half [[P2_VAL]]) +; CHECK-NEXT: ret void +; +entry: + %src_element_op_0 = getelementptr ptr, ptr %this, i64 0 + %load0 = load ptr, ptr %src_element_op_0 + call fastcc void @child(ptr %load0, ptr %p1, ptr %p2) + %src_element_op_1 = getelementptr ptr, ptr %this, i64 1 + %load1 = load ptr, ptr %src_element_op_1 + call fastcc void @child(ptr %load1, ptr %p1, ptr %p2) + %src_element_op_2 = getelementptr ptr, ptr %this, i64 2 + %load2 = load ptr, ptr %src_element_op_2 + call fastcc void @child(ptr %load2, ptr %p1, ptr %p2) + ret void +} + +define void @grandparent() nounwind { +; CHECK-LABEL: @grandparent( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[F:%.*]] = alloca [[PTR_STRUCT:%.*]], align 8 +; CHECK-NEXT: [[XPTR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[YPTR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call fastcc void @parent(ptr [[XPTR]], ptr [[YPTR]]) +; CHECK-NEXT: ret void +; +entry: + %f = alloca %ptr.struct + %xptr = alloca i32 + %yptr = alloca i32 + call fastcc void @parent(ptr %f, ptr %xptr, ptr %yptr) + ret void +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll b/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll --- a/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll +++ b/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll @@ -83,7 +83,6 @@ ; CHECK-NEXT: call void @g(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] ; CHECK-NEXT: call void @h(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] ; CHECK-NEXT: call void @k(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] -; CHECK-NEXT: [[S_VAL:%.*]] = load ptr, ptr [[S]], align 8 ; CHECK-NEXT: call void @l() #[[ATTR0]] ; CHECK-NEXT: ret i32 0 ;