diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -67,6 +67,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include @@ -220,6 +221,8 @@ // pass in the loaded pointers. SmallVector Args; const DataLayout &DL = F->getParent()->getDataLayout(); + SmallVector DeadArgs; + while (!F->use_empty()) { CallBase &CB = cast(*F->user_back()); assert(CB.getCalledFunction() == F); @@ -255,6 +258,9 @@ Args.push_back(LI); ArgAttrVec.push_back(AttributeSet()); } + } else { // ArgsToPromote.count(&*I) && I->use_empty() + assert(ArgsToPromote.count(&*I) && I->use_empty()); + DeadArgs.emplace_back(AI->get()); } } @@ -297,6 +303,8 @@ CB.eraseFromParent(); } + RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadArgs); + // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. diff --git a/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll b/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --version 2 +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s + +%ptr.struct = type { ptr, ptr, ptr } + +define internal void @child(ptr %this, ptr %y, ptr %x) { +; CHECK-LABEL: define {{[^@]+}}@child +; CHECK-SAME: (ptr [[Y:%.*]], half [[X_0_VAL:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store half [[X_0_VAL]], ptr [[Y]], align 2 +; CHECK-NEXT: ret void +; +entry: + %0 = load half, ptr %x + store half %0, ptr %y + ret void +} + +define internal void @parent(ptr %this, ptr %p1, ptr %p2) { +; CHECK-LABEL: define {{[^@]+}}@parent +; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P2_VAL2:%.*]] = load half, ptr [[P2]], align 2 +; CHECK-NEXT: call void @child(ptr [[P1]], half [[P2_VAL2]]) +; CHECK-NEXT: [[P2_VAL1:%.*]] = load half, ptr [[P2]], align 2 +; CHECK-NEXT: call void @child(ptr [[P1]], half [[P2_VAL1]]) +; CHECK-NEXT: [[P2_VAL:%.*]] = load half, ptr [[P2]], align 2 +; CHECK-NEXT: call void @child(ptr [[P1]], half [[P2_VAL]]) +; CHECK-NEXT: ret void +; +entry: + %src_element_op_0 = getelementptr ptr, ptr %this, i64 0 + %load0 = load ptr, ptr %src_element_op_0 + call void @child(ptr %load0, ptr %p1, ptr %p2) + %src_element_op_1 = getelementptr ptr, ptr %this, i64 1 + %load1 = load ptr, ptr %src_element_op_1 + call void @child(ptr %load1, ptr %p1, ptr %p2) + %src_element_op_2 = getelementptr ptr, ptr %this, i64 2 + %load2 = load ptr, ptr %src_element_op_2 + call void @child(ptr %load2, ptr %p1, ptr %p2) + ret void +} + +define void @grandparent() { +; CHECK-LABEL: define {{[^@]+}}@grandparent() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[XPTR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[YPTR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @parent(ptr [[XPTR]], ptr [[YPTR]]) +; CHECK-NEXT: ret void +; +entry: + %f = alloca %ptr.struct + %xptr = alloca i32 + %yptr = alloca i32 + call void @parent(ptr %f, ptr %xptr, ptr %yptr) + ret void +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll b/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll --- a/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll +++ b/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll @@ -83,7 +83,6 @@ ; CHECK-NEXT: call void @g(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] ; CHECK-NEXT: call void @h(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] ; CHECK-NEXT: call void @k(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] -; CHECK-NEXT: [[S_VAL:%.*]] = load ptr, ptr [[S]], align 8 ; CHECK-NEXT: call void @l() #[[ATTR0]] ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll b/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature +; RUN: opt -O3 -S < %s | FileCheck %s + +; Arg promotion eliminates the struct argument, and eliminates dead arguments, but introduces and leaves dead loads of the eliminated dead arg in callers + +%struct.ss = type { ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr } + +define internal void @phantomLoad(ptr %p, ptr %y, ptr %x) { +entry: + %0 = load i32, ptr %x + store i32 %0, ptr %y + ret void +} + +define ptr @parent(ptr align 8 dereferenceable(72) %f, i16 %val1, i16 %val2, i32 %val3) align 2 { +; CHECK-LABEL: define {{[^@]+}}@parent +; CHECK-SAME: (ptr readonly returned align 8 dereferenceable(72) [[F:%.*]], i16 [[VAL1:%.*]], i16 [[VAL2:%.*]], i32 [[VAL3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] align 2 { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[F]], i64 64 +; CHECK-NEXT: [[F_VAL:%.*]] = load ptr, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[CMP_NOT_NOT_I:%.*]] = icmp eq i32 [[VAL3]], 0 +; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], i16 [[VAL1]], i16 [[VAL2]] +; CHECK-NEXT: [[SPEC_SELECT2_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], i16 [[VAL2]], i16 [[VAL1]] +; CHECK-NEXT: store i16 [[SPEC_SELECT_I]], ptr [[F_VAL]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[F_VAL]], i64 16 +; CHECK-NEXT: store i16 [[SPEC_SELECT2_I]], ptr [[TMP1]], align 2 +; CHECK-NEXT: ret ptr [[F]] +; +entry: + call void @badChild(ptr align 8 dereferenceable(72) %f, i16 %val1, i16 %val2, i32 %val3) #4 + ret ptr %f +} + +define internal void @badChild(ptr align 8 dereferenceable(72) %this, i16 %val1, i16 %val2, i32 %val3) align 2 { +entry: + %othergep = getelementptr inbounds %struct.ss, ptr %this, i64 0, i32 2 + %load0 = load ptr, ptr %othergep, align 8 + %load2 = load ptr, ptr %this + %x = alloca i32 + %y = alloca i32 + call void @phantomLoad(ptr %load0, ptr %x, ptr %y) + call void @phantomLoad(ptr %load2, ptr %x, ptr %y) + %cmp.not.not = icmp eq i32 %val3, 0 + br i1 %cmp.not.not, label %if.then, label %if.else + +if.then: ; preds = %entry + %0 = getelementptr inbounds %struct.ss, ptr %this, i64 0, i32 8 + %1 = load ptr, ptr %0, align 8 + store i16 %val1, ptr %1, align 2 + %add.ptr.i.i.i.i = getelementptr inbounds i8, ptr %1, i64 16 + store i16 %val2, ptr %add.ptr.i.i.i.i, align 2 + br label %if.end + +if.else: ; preds = %entry + %2 = getelementptr inbounds %struct.ss, ptr %this, i64 0, i32 8 + %3 = load ptr, ptr %2, align 8 + %add.ptr.i.i.i.i7 = getelementptr inbounds i8, ptr %3, i64 16 + store i16 %val1, ptr %add.ptr.i.i.i.i7, align 2 + store i16 %val2, ptr %3, align 2 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} +