diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -720,7 +720,7 @@ Value *V = WorkList.pop_back_val(); if (isa(V) || isa(V)) { if (PtrValues.insert(V).second) - llvm::append_range(WorkList, V->users()); + append_range(WorkList, V->users()); } else if (StoreInst *Store = dyn_cast(V)) { Stores.push_back(Store); } else if (!isa(V)) { @@ -838,9 +838,23 @@ } } - // If this is a byval argument, and if the aggregate type is small, just - // pass the elements, which is always safe, if the passed value is densely - // packed or if we can prove the padding bytes are never accessed. + // If we can promote the pointer to its value. + SmallVector ArgParts; + if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) { + SmallVector Types; + for (const auto &Pair : ArgParts) + Types.push_back(Pair.second.Ty); + + if (areTypesABICompatible(Types, *F, TTI)) { + ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); + continue; + } + } + + // Otherwise, if this is a byval argument, and if the aggregate type is + // small, just pass the elements, which is always safe, if the passed value + // is densely packed or if we can prove the padding bytes are never + // accessed. // // Only handle arguments with specified alignment; if it's unspecified, the // actual alignment of the argument is target-specific. @@ -849,43 +863,32 @@ ByValTy && PtrArg->getParamAlign() && (ArgumentPromotionPass::isDenselyPacked(ByValTy, DL) || !canPaddingBeAccessed(PtrArg)); - if (IsSafeToPromote) { - if (StructType *STy = dyn_cast(ByValTy)) { - if (MaxElements > 0 && STy->getNumElements() > MaxElements) { - LLVM_DEBUG(dbgs() << "ArgPromotion disables promoting argument '" - << PtrArg->getName() - << "' because it would require adding more" - << " than " << MaxElements - << " arguments to the function.\n"); - continue; - } - - SmallVector Types; - append_range(Types, STy->elements()); - - // If all the elements are single-value types, we can promote it. - bool AllSimple = - all_of(Types, [](Type *Ty) { return Ty->isSingleValueType(); }); - - // Safe to transform, don't even bother trying to "promote" it. - // Passing the elements as a scalar will allow sroa to hack on - // the new alloca we introduce. - if (AllSimple && areTypesABICompatible(Types, *F, TTI)) { - ByValArgsToTransform.insert(PtrArg); - continue; - } - } + if (!IsSafeToPromote) { + LLVM_DEBUG(dbgs() << "ArgPromotion disables passing the elements of" + << " the argument '" << PtrArg->getName() + << "' because it is not safe.\n"); + continue; } - - // Otherwise, see if we can promote the pointer to its value. - SmallVector ArgParts; - if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) { + if (StructType *STy = dyn_cast(ByValTy)) { + if (MaxElements > 0 && STy->getNumElements() > MaxElements) { + LLVM_DEBUG(dbgs() << "ArgPromotion disables passing the elements of" + << " the argument '" << PtrArg->getName() + << "' because it would require adding more" + << " than " << MaxElements + << " arguments to the function.\n"); + continue; + } SmallVector Types; - for (const auto &Pair : ArgParts) - Types.push_back(Pair.second.Ty); + append_range(Types, STy->elements()); - if (areTypesABICompatible(Types, *F, TTI)) - ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); + // If all the elements are single-value types, we can promote it. + bool AllSimple = + all_of(Types, [](Type *Ty) { return Ty->isSingleValueType(); }); + + // Safe to transform. Passing the elements as a scalar will allow sroa to + // hack on the new alloca we introduce. + if (AllSimple && areTypesABICompatible(Types, *F, TTI)) + ByValArgsToTransform.insert(PtrArg); } } diff --git a/llvm/test/Transforms/ArgumentPromotion/byval-through-pointer-promotion.ll b/llvm/test/Transforms/ArgumentPromotion/byval-through-pointer-promotion.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/byval-through-pointer-promotion.ll @@ -0,0 +1,45 @@ +; RUN: opt -passes=argpromotion -S %s | FileCheck %s + +%struct.A = type { float, [12 x i8], i64, [8 x i8] } + +define internal float @callee(%struct.A* byval(%struct.A) align 32 %0) { +; CHECK-LABEL: define {{[^@]+}}@callee +; CHECK-SAME: (float [[ARG_0:%.*]], i64 [[ARG_1:%.*]]) { +; CHECK-NEXT: [[SUM:%.*]] = fadd float 0.000000e+00, [[ARG_0]] +; CHECK-NEXT: [[COEFF:%.*]] = uitofp i64 [[ARG_1]] to float +; CHECK-NEXT: [[RES:%.*]] = fmul float [[SUM]], [[COEFF]] +; CHECK-NEXT: ret float [[RES]] +; + %2 = getelementptr inbounds %struct.A, %struct.A* %0, i32 0, i32 0 + %3 = load float, float* %2, align 32 + %4 = fadd float 0.000000e+00, %3 + %5 = getelementptr inbounds %struct.A, %struct.A* %0, i32 0, i32 2 + %6 = load i64, i64* %5, align 16 + %7 = uitofp i64 %6 to float + %8 = fmul float %4, %7 + ret float %8 +} + +define float @caller(float %0) { +; CHECK-LABEL: define {{[^@]+}}@caller +; CHECK-SAME: (float [[ARG_0:%.*]]) { +; CHECK-NEXT: [[TMP_0:%.*]] = alloca %struct.A, align 32 +; CHECK-NEXT: [[FL_PTR_0:%.*]] = getelementptr inbounds %struct.A, %struct.A* [[TMP_0]], i32 0, i32 0 +; CHECK-NEXT: store float [[ARG_0]], float* [[FL_PTR_0]], align 32 +; CHECK-NEXT: [[I64_PTR_0:%.*]] = getelementptr inbounds %struct.A, %struct.A* [[TMP_0]], i32 0, i32 2 +; CHECK-NEXT: store i64 2, i64* [[I64_PTR_0]], align 16 +; CHECK-NEXT: [[FL_PTR_1:%.*]] = getelementptr %struct.A, %struct.A* [[TMP_0]], i64 0, i32 0 +; CHECK-NEXT: [[FL_VAL:%.*]] = load float, float* [[FL_PTR_1]], align 32 +; CHECK-NEXT: [[I64_PTR_1:%.*]] = getelementptr %struct.A, %struct.A* [[TMP_0]], i64 0, i32 2 +; CHECK-NEXT: [[I64_VAL:%.*]] = load i64, i64* [[I64_PTR_1]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call noundef float @callee(float [[FL_VAL]], i64 [[I64_VAL]]) +; CHECK-NEXT: ret float [[RES]] +; + %2 = alloca %struct.A, align 32 + %3 = getelementptr inbounds %struct.A, %struct.A* %2, i32 0, i32 0 + store float %0, float* %3, align 32 + %4 = getelementptr inbounds %struct.A, %struct.A* %2, i32 0, i32 2 + store i64 2, i64* %4, align 16 + %5 = call noundef float @callee(%struct.A* byval(%struct.A) align 32 %2) + ret float %5 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/ArgumentPromotion/dbg.ll --- a/llvm/test/Transforms/ArgumentPromotion/dbg.ll +++ b/llvm/test/Transforms/ArgumentPromotion/dbg.ll @@ -21,12 +21,18 @@ ; CHECK-LABEL: define {{[^@]+}}@test_byval ; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) { ; CHECK-NEXT: [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 4 -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 0 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0 ; CHECK-NEXT: store i32 [[P_0]], i32* [[DOT0]], align 4 -; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 1 ; CHECK-NEXT: store i32 [[P_1]], i32* [[DOT1]], align 4 +; CHECK-NEXT: [[SINK:%.*]] = alloca i32*, align 8 +; CHECK-NEXT: [[DOT2:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0 +; CHECK-NEXT: store i32* [[DOT2]], i32** [[SINK]], align 8 ; CHECK-NEXT: ret void ; + %1 = alloca i32*, align 8 + %2 = getelementptr %struct.pair, %struct.pair* %P, i32 0, i32 0 + store i32* %2, i32** %1, align 8 ; to protect from "usual" promotion ret void } diff --git a/llvm/test/Transforms/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/ArgumentPromotion/fp80.ll --- a/llvm/test/Transforms/ArgumentPromotion/fp80.ll +++ b/llvm/test/Transforms/ArgumentPromotion/fp80.ll @@ -53,13 +53,21 @@ ; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafely ; CHECK-SAME: (x86_fp80 [[ARG_0:%.*]]) { ; CHECK-NEXT: [[ARG:%.*]] = alloca [[UNION_U:%.*]], align 16 -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U]], %union.u* [[ARG]], i32 0, i32 0 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U]], [[UNION_U]]* [[ARG]], i32 0, i32 0 ; CHECK-NEXT: store x86_fp80 [[ARG_0]], x86_fp80* [[DOT0]], align 16 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], %union.u* [[ARG]], i64 0, i32 0 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], [[UNION_U]]* [[ARG]], i64 0, i32 0 +; CHECK-NEXT: [[IDX_P:%.*]] = alloca i64, align 8 +; CHECK-NEXT: store i64 0, i64* [[IDX_P]], align 8 +; CHECK-NEXT: [[IDX:%.*]] = load i64, i64* [[IDX_P]], align 8 +; CHECK-NEXT: [[GEP_IDX:%.*]] = getelementptr inbounds [[UNION_U]], [[UNION_U]]* [[ARG]], i64 [[IDX]], i32 0 ; CHECK-NEXT: [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]], align 16 ; CHECK-NEXT: ret x86_fp80 [[FP80]] ; %gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0 + %idx_slot = alloca i64, align 8 + store i64 0, i64* %idx_slot, align 8 + %idx = load i64, i64* %idx_slot, align 8 + %gep_idx = getelementptr inbounds %union.u, %union.u* %arg, i64 %idx, i32 0 ; to protect from "usual" promotion %fp80 = load x86_fp80, x86_fp80* %gep ret x86_fp80 %fp80 }