diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -86,7 +86,6 @@ #define DEBUG_TYPE "argpromotion" STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted"); -STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted"); STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated"); namespace { @@ -156,7 +155,6 @@ static Function *doPromotion( Function *F, const DenseMap> &ArgsToPromote, - SmallPtrSetImpl &ByValArgsToTransform, Optional> ReplaceCallSite) { // Start by computing a new prototype for the function, which is the same as @@ -174,15 +172,7 @@ unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgNo) { - if (ByValArgsToTransform.count(&*I)) { - // Simple byval argument? Just add all the struct element types. - Type *AgTy = I->getParamByValType(); - StructType *STy = cast(AgTy); - llvm::append_range(Params, STy->elements()); - ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(), - AttributeSet()); - ++NumByValArgsPromoted; - } else if (!ArgsToPromote.count(&*I)) { + if (!ArgsToPromote.count(&*I)) { // Unchanged argument Params.push_back(I->getType()); ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo)); @@ -251,28 +241,9 @@ ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++AI, ++ArgNo) - if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { + if (!ArgsToPromote.count(&*I)) { Args.push_back(*AI); // Unmodified argument ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo)); - } else if (ByValArgsToTransform.count(&*I)) { - // Emit a GEP and load for each element of the struct. - Type *AgTy = I->getParamByValType(); - StructType *STy = cast(AgTy); - Value *Idxs[2] = { - ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; - const StructLayout *SL = DL.getStructLayout(STy); - Align StructAlign = *I->getParamAlign(); - for (unsigned J = 0, Elems = STy->getNumElements(); J != Elems; ++J) { - Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), J); - auto *Idx = - IRB.CreateGEP(STy, *AI, Idxs, (*AI)->getName() + "." + Twine(J)); - // TODO: Tell AA about the new values? - Align Alignment = - commonAlignment(StructAlign, SL->getElementOffset(J)); - Args.push_back(IRB.CreateAlignedLoad( - STy->getElementType(J), Idx, Alignment, Idx->getName() + ".val")); - ArgAttrVec.push_back(AttributeSet()); - } } else if (!I->use_empty()) { Value *V = *AI; const auto &ArgParts = ArgsToPromote.find(&*I)->second; @@ -346,7 +317,7 @@ // the new arguments, also transferring over the names as well. Function::arg_iterator I2 = NF->arg_begin(); for (Argument &Arg : F->args()) { - if (!ArgsToPromote.count(&Arg) && !ByValArgsToTransform.count(&Arg)) { + if (!ArgsToPromote.count(&Arg)) { // If this is an unmodified argument, move the name and users over to the // new version. Arg.replaceAllUsesWith(&*I2); @@ -355,37 +326,6 @@ continue; } - if (ByValArgsToTransform.count(&Arg)) { - // In the callee, we create an alloca, and store each of the new incoming - // arguments into the alloca. - Instruction *InsertPt = &NF->begin()->front(); - - // Just add all the struct element types. - Type *AgTy = Arg.getParamByValType(); - Align StructAlign = *Arg.getParamAlign(); - Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, - StructAlign, "", InsertPt); - StructType *STy = cast(AgTy); - Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), - nullptr}; - const StructLayout *SL = DL.getStructLayout(STy); - - for (unsigned J = 0, Elems = STy->getNumElements(); J != Elems; ++J) { - Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), J); - Value *Idx = GetElementPtrInst::Create( - AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(J), - InsertPt); - I2->setName(Arg.getName() + "." + Twine(J)); - Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(J)); - new StoreInst(&*I2++, Idx, false, Alignment, InsertPt); - } - - // Anything that used the arg should now use the alloca. - Arg.replaceAllUsesWith(TheAlloca); - TheAlloca->takeName(&Arg); - continue; - } - // There potentially are metadata uses for things like llvm.dbg.value. // Replace them with undef, after handling the other regular uses. auto RauwUndefMetadata = make_scope_exit( @@ -402,8 +342,8 @@ } // Otherwise, if we promoted this argument, then all users are load - // instructions (with possible casts and GEPs in between). - + // instructions (with possible casts and GEPs in between) or store ones if + // the byval attribute is used. SmallVector Worklist; SmallVector DeadInsts; append_range(Worklist, Arg.users()); @@ -427,6 +367,16 @@ continue; } + if (auto *SI = dyn_cast(V)) { + // Stores are only allowed for byval arguments. This is a job of the + // findArgParts function to decide should arguments with stores as + // users be eligible for promotion. If a store is an allowed user, it + // writes to a temporary (created explicitly or implicitly with the + // byval attribute, so the instruction can be removed too). + DeadInsts.push_back(SI); + continue; + } + llvm_unreachable("Unexpected user"); } @@ -456,8 +406,8 @@ // direct callees. return all_of(Callee->users(), [&](User *U) { CallBase &CB = cast(*U); - return isDereferenceableAndAlignedPointer( - CB.getArgOperand(Arg->getArgNo()), NeededAlign, Bytes, DL); + return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()), + NeededAlign, Bytes, DL); }); } @@ -465,6 +415,7 @@ /// parts it can be promoted into. static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, unsigned MaxElements, bool IsRecursive, + bool IsStoresAllowed, SmallVectorImpl &ArgPartsVec) { // Quick exit for unused arguments if (Arg->use_empty()) @@ -605,6 +556,10 @@ continue; } + // Stores are allowed for byval arguments + if (IsStoresAllowed && isa(V)) + continue; + // Unknown user. LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " << "unknown user " << *V << "\n"); @@ -724,11 +679,17 @@ SmallVector Stores; // Scan through the uses recursively to make sure the pointer is always used - // sanely. + // sanely. Note: we don't care whether the parts of the argument are actually + // loaded or stored, if we have an improper user (GEP with a non-constant + // index for example), we report that the padding can be accessed even if + // the user doesn't lead to a load or store instruction. SmallVector WorkList(Arg->users()); while (!WorkList.empty()) { Value *V = WorkList.pop_back_val(); if (isa(V) || isa(V)) { + auto *GEP = dyn_cast(V); + if (GEP && !GEP->hasAllConstantIndices()) + return true; if (PtrValues.insert(V).second) append_range(WorkList, V->users()); } else if (StoreInst *Store = dyn_cast(V)) { @@ -774,7 +735,7 @@ // Don't perform argument promotion for naked functions; otherwise we can end // up removing parameters that are seemingly 'not used' as they are referred // to in the assembly. - if(F->hasFnAttribute(Attribute::Naked)) + if (F->hasFnAttribute(Attribute::Naked)) return nullptr; // Make sure that it is local to this module. @@ -833,7 +794,6 @@ // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. DenseMap> ArgsToPromote; - SmallPtrSet ByValArgsToTransform; for (Argument *PtrArg : PointerArgs) { // Replace sret attribute with noalias. This reduces register pressure by // avoiding a register copy. @@ -850,63 +810,34 @@ // If we can promote the pointer to its value. SmallVector ArgParts; - if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) { + // And if this is a byval argument we also allow to have the store + // instructions as the argument's users if the passed value is densely + // packed or if we can prove the padding bytes are never accessed. Only + // handle in such way arguments with specified alignment; if it's + // unspecified, the actual alignment of the argument is target-specific. + Type *ByValTy = PtrArg->getParamByValType(); + bool IsStoresAllowed = + ByValTy && PtrArg->getParamAlign() && + (ArgumentPromotionPass::isDenselyPacked(ByValTy, DL) || + !canPaddingBeAccessed(PtrArg)); + + if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, IsStoresAllowed, + ArgParts)) { SmallVector Types; for (const auto &Pair : ArgParts) Types.push_back(Pair.second.Ty); if (areTypesABICompatible(Types, *F, TTI)) { ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); - continue; - } - } - - // Otherwise, if this is a byval argument, and if the aggregate type is - // small, just pass the elements, which is always safe, if the passed value - // is densely packed or if we can prove the padding bytes are never - // accessed. - // - // Only handle arguments with specified alignment; if it's unspecified, the - // actual alignment of the argument is target-specific. - Type *ByValTy = PtrArg->getParamByValType(); - bool IsSafeToPromote = - ByValTy && PtrArg->getParamAlign() && - (ArgumentPromotionPass::isDenselyPacked(ByValTy, DL) || - !canPaddingBeAccessed(PtrArg)); - if (!IsSafeToPromote) { - LLVM_DEBUG(dbgs() << "ArgPromotion disables passing the elements of" - << " the argument '" << PtrArg->getName() - << "' because it is not safe.\n"); - continue; - } - if (StructType *STy = dyn_cast(ByValTy)) { - if (MaxElements > 0 && STy->getNumElements() > MaxElements) { - LLVM_DEBUG(dbgs() << "ArgPromotion disables passing the elements of" - << " the argument '" << PtrArg->getName() - << "' because it would require adding more" - << " than " << MaxElements - << " arguments to the function.\n"); - continue; } - SmallVector Types; - append_range(Types, STy->elements()); - - // If all the elements are single-value types, we can promote it. - bool AllSimple = - all_of(Types, [](Type *Ty) { return Ty->isSingleValueType(); }); - - // Safe to transform. Passing the elements as a scalar will allow sroa to - // hack on the new alloca we introduce. - if (AllSimple && areTypesABICompatible(Types, *F, TTI)) - ByValArgsToTransform.insert(PtrArg); } } // No promotable pointer arguments. - if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) + if (ArgsToPromote.empty()) return nullptr; - return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); + return doPromotion(F, ArgsToPromote, ReplaceCallSite); } PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, diff --git a/llvm/test/Transforms/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/attrs.ll --- a/llvm/test/Transforms/ArgumentPromotion/attrs.ll +++ b/llvm/test/Transforms/ArgumentPromotion/attrs.ll @@ -3,25 +3,14 @@ %struct.ss = type { i32, i64 } -; Don't drop 'byval' on %X here. define internal void @f(%struct.ss* byval(%struct.ss) align 4 %b, i32* byval(i32) align 4 %X, i32 %i) nounwind { ; CHECK-LABEL: define {{[^@]+}}@f -; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval(i32) align 4 [[X:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (i32 [[B_0:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 -; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 -; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 -; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: [[TEMP1:%.*]] = load i32, i32* [[TEMP]], align 4 -; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[TEMP1]], 1 -; CHECK-NEXT: store i32 [[TEMP2]], i32* [[TEMP]], align 4 -; CHECK-NEXT: store i32 0, i32* [[X]], align 4 +; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1 ; CHECK-NEXT: ret void ; entry: - %temp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 %temp1 = load i32, i32* %temp, align 4 %temp2 = add i32 %temp1, 1 @@ -41,11 +30,9 @@ ; CHECK-NEXT: store i32 1, i32* [[TEMP1]], align 8 ; CHECK-NEXT: [[TEMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: store i64 2, i64* [[TEMP4]], align 4 -; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0 ; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 -; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 -; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval(i32) align 4 [[X]], i32 zeroext 0) +; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i32 zeroext 0) ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll --- a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll +++ b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll @@ -2,24 +2,14 @@ ; RUN: opt < %s -passes=argpromotion -S | FileCheck %s ; Arg promotion eliminates the struct argument. -; FIXME: We should eliminate the i32* argument. %struct.ss = type { i32, i64 } define internal void @f(%struct.ss* byval(%struct.ss) align 8 %b, i32* byval(i32) align 4 %X) nounwind { ; CHECK-LABEL: define {{[^@]+}}@f -; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval(i32) align 4 [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (i32 [[B_0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 8 -; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 -; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 -; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: [[TEMP1:%.*]] = load i32, i32* [[TEMP]], align 4 -; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[TEMP1]], 1 -; CHECK-NEXT: store i32 [[TEMP2]], i32* [[TEMP]], align 4 -; CHECK-NEXT: store i32 0, i32* [[X]], align 4 +; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1 ; CHECK-NEXT: ret void ; entry: @@ -41,11 +31,9 @@ ; CHECK-NEXT: store i32 1, i32* [[TEMP1]], align 8 ; CHECK-NEXT: [[TEMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: store i64 2, i64* [[TEMP4]], align 4 -; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 -; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 8 -; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 -; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval(i32) align 4 [[X]]) +; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0 +; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 +; CHECK-NEXT: call void @f(i32 [[S_0_VAL]]) ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/ArgumentPromotion/byval.ll b/llvm/test/Transforms/ArgumentPromotion/byval.ll --- a/llvm/test/Transforms/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/ArgumentPromotion/byval.ll @@ -7,17 +7,9 @@ define internal void @f(%struct.ss* byval(%struct.ss) align 4 %b) nounwind { ; CHECK-LABEL: define {{[^@]+}}@f -; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (i32 [[B_0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 -; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 -; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 -; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: [[TEMP1:%.*]] = load i32, i32* [[TEMP]], align 4 -; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[TEMP1]], 1 -; CHECK-NEXT: store i32 [[TEMP2]], i32* [[TEMP]], align 4 +; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1 ; CHECK-NEXT: ret void ; entry: @@ -28,20 +20,11 @@ ret void } - define internal void @g(%struct.ss* byval(%struct.ss) align 32 %b) nounwind { ; CHECK-LABEL: define {{[^@]+}}@g -; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i32 [[B_0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 32 -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 32 -; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 -; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 -; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: [[TEMP1:%.*]] = load i32, i32* [[TEMP]], align 4 -; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[TEMP1]], 1 -; CHECK-NEXT: store i32 [[TEMP2]], i32* [[TEMP]], align 4 +; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1 ; CHECK-NEXT: ret void ; entry: @@ -75,6 +58,29 @@ ret void } +; Don't transform if an argument is written to and then is loaded from, +; the Alias Analysis' 'canInstructionRangeModRef' check has to return +; 'false' in that case. +define internal void @k(%struct.ss* byval(%struct.ss) align 4 %b) nounwind { +; CHECK-LABEL: define {{[^@]+}}@k +; CHECK-SAME: (%struct.ss* byval([[STRUCT_SS:%.*]]) align 4 [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[TEMP1:%.*]] = load i32, i32* [[TEMP]], align 4 +; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[TEMP1]], 1 +; CHECK-NEXT: store i32 [[TEMP2]], i32* [[TEMP]], align 4 +; CHECK-NEXT: [[TEMP3:%.*]] = load i32, i32* [[TEMP]], align 4 +; CHECK-NEXT: ret void +; +entry: + %temp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %temp1 = load i32, i32* %temp, align 4 + %temp2 = add i32 %temp1, 1 + store i32 %temp2, i32* %temp, align 4 + %temp3 = load i32, i32* %temp, align 4 + ret void +} + define i32 @main() nounwind { ; CHECK-LABEL: define {{[^@]+}}@main ; CHECK-SAME: () #[[ATTR0]] { @@ -84,17 +90,14 @@ ; CHECK-NEXT: store i32 1, i32* [[TEMP1]], align 8 ; CHECK-NEXT: [[TEMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: store i64 2, i64* [[TEMP4]], align 4 -; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0 ; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 -; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 -; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]]) -; CHECK-NEXT: [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 -; CHECK-NEXT: [[S_01_VAL:%.*]] = load i32, i32* [[S_01]], align 32 -; CHECK-NEXT: [[S_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; CHECK-NEXT: [[S_12_VAL:%.*]] = load i64, i64* [[S_12]], align 4 -; CHECK-NEXT: call void @g(i32 [[S_01_VAL]], i64 [[S_12_VAL]]) +; CHECK-NEXT: call void @f(i32 [[S_0_VAL]]) +; CHECK-NEXT: [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0 +; CHECK-NEXT: [[S_01_VAL:%.*]] = load i32, i32* [[S_01]], align 4 +; CHECK-NEXT: call void @g(i32 [[S_01_VAL]]) ; CHECK-NEXT: call void @h(%struct.ss* byval([[STRUCT_SS]]) [[S]]) +; CHECK-NEXT: call void @k(%struct.ss* byval([[STRUCT_SS]]) align 4 [[S]]) ; CHECK-NEXT: ret i32 0 ; entry: @@ -106,6 +109,7 @@ call void @f(%struct.ss* byval(%struct.ss) align 4 %S) nounwind call void @g(%struct.ss* byval(%struct.ss) align 32 %S) nounwind call void @h(%struct.ss* byval(%struct.ss) %S) nounwind + call void @k(%struct.ss* byval(%struct.ss) align 4 %S) nounwind ret i32 0 } diff --git a/llvm/test/Transforms/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/ArgumentPromotion/dbg.ll --- a/llvm/test/Transforms/ArgumentPromotion/dbg.ll +++ b/llvm/test/Transforms/ArgumentPromotion/dbg.ll @@ -19,15 +19,8 @@ define internal void @test_byval(%struct.pair* byval(%struct.pair) align 4 %P) { ; CHECK-LABEL: define {{[^@]+}}@test_byval -; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) { -; CHECK-NEXT: [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 4 -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0 -; CHECK-NEXT: store i32 [[P_0]], i32* [[DOT0]], align 4 -; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 1 -; CHECK-NEXT: store i32 [[P_1]], i32* [[DOT1]], align 4 +; CHECK-SAME: () { ; CHECK-NEXT: [[SINK:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOT2:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0 -; CHECK-NEXT: store i32* [[DOT2]], i32** [[SINK]], align 8 ; CHECK-NEXT: ret void ; %1 = alloca i32*, align 8 @@ -42,11 +35,7 @@ ; CHECK-NEXT: [[Y_VAL:%.*]] = load i32*, i32** [[Y]], align 8, !dbg [[DBG4:![0-9]+]] ; CHECK-NEXT: [[Y_VAL_VAL:%.*]] = load i32, i32* [[Y_VAL]], align 8, !dbg [[DBG4]] ; CHECK-NEXT: call void @test(i32 [[Y_VAL_VAL]]), !dbg [[DBG4]] -; CHECK-NEXT: [[P_0:%.*]] = getelementptr [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 0, !dbg [[DBG5:![0-9]+]] -; CHECK-NEXT: [[P_0_VAL:%.*]] = load i32, i32* [[P_0]], align 4, !dbg [[DBG5]] -; CHECK-NEXT: [[P_1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1, !dbg [[DBG5]] -; CHECK-NEXT: [[P_1_VAL:%.*]] = load i32, i32* [[P_1]], align 4, !dbg [[DBG5]] -; CHECK-NEXT: call void @test_byval(i32 [[P_0_VAL]], i32 [[P_1_VAL]]), !dbg [[DBG5]] +; CHECK-NEXT: call void @test_byval(), !dbg [[DBG5:![0-9]+]] ; CHECK-NEXT: ret void ; call void @test(i32** %Y), !dbg !1 diff --git a/llvm/test/Transforms/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/ArgumentPromotion/fp80.ll --- a/llvm/test/Transforms/ArgumentPromotion/fp80.ll +++ b/llvm/test/Transforms/ArgumentPromotion/fp80.ll @@ -14,23 +14,23 @@ define void @run() { ; CHECK-LABEL: define {{[^@]+}}@run() { -; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast %union.u* bitcast (%struct.s* @b to %union.u*) to i8* ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 10 ; CHECK-NEXT: [[DOTVAL:%.*]] = load i8, i8* [[TMP1]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i8 @UseLongDoubleUnsafely(i8 [[DOTVAL]]) -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i64 0, i32 0 ; CHECK-NEXT: [[DOT0_VAL:%.*]] = load x86_fp80, x86_fp80* [[DOT0]], align 16 ; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[DOT0_VAL]]) -; CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.Foo* @a to i64* -; CHECK-NEXT: [[A_VAL:%.*]] = load i64, i64* [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @AccessPaddingOfStruct(i64 [[A_VAL]]) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @CaptureAStruct(%struct.Foo* byval([[STRUCT_FOO:%.*]]) @a) +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_fp80 @UseLongDoubleSafelyNoPromotion(%union.u* byval(%union.u) align 16 bitcast (%struct.s* @b to %union.u*)) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.Foo* @a to i64* +; CHECK-NEXT: [[A_VAL:%.*]] = load i64, i64* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @AccessPaddingOfStruct(i64 [[A_VAL]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @CaptureAStruct(%struct.Foo* byval([[STRUCT_FOO:%.*]]) @a) ; CHECK-NEXT: ret void ; -entry: tail call i8 @UseLongDoubleUnsafely(%union.u* byval(%union.u) align 16 bitcast (%struct.s* @b to %union.u*)) tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval(%union.u) align 16 bitcast (%struct.s* @b to %union.u*)) + tail call x86_fp80 @UseLongDoubleSafelyNoPromotion(%union.u* byval(%union.u) align 16 bitcast (%struct.s* @b to %union.u*)) call i64 @AccessPaddingOfStruct(%struct.Foo* byval(%struct.Foo) @a) call i64 @CaptureAStruct(%struct.Foo* byval(%struct.Foo) @a) ret void @@ -38,11 +38,9 @@ define internal i8 @UseLongDoubleUnsafely(%union.u* byval(%union.u) align 16 %arg) { ; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleUnsafely -; CHECK-SAME: (i8 [[ARG_10_VAL:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i8 [[ARG_10_VAL]] +; CHECK-SAME: (i8 [[ARG_0_VAL:%.*]]) { +; CHECK-NEXT: ret i8 [[ARG_0_VAL]] ; -entry: %bitcast = bitcast %union.u* %arg to %struct.s* %gep = getelementptr inbounds %struct.s, %struct.s* %bitcast, i64 0, i32 2 %result = load i8, i8* %gep @@ -51,23 +49,30 @@ define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval(%union.u) align 16 %arg) { ; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafely -; CHECK-SAME: (x86_fp80 [[ARG_0:%.*]]) { -; CHECK-NEXT: [[ARG:%.*]] = alloca [[UNION_U:%.*]], align 16 -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U]], [[UNION_U]]* [[ARG]], i32 0, i32 0 -; CHECK-NEXT: store x86_fp80 [[ARG_0]], x86_fp80* [[DOT0]], align 16 +; CHECK-SAME: (x86_fp80 [[ARG_0_VAL:%.*]]) { +; CHECK-NEXT: ret x86_fp80 [[ARG_0_VAL]] +; + %gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0 + %fp80 = load x86_fp80, x86_fp80* %gep + ret x86_fp80 %fp80 +} + +define internal x86_fp80 @UseLongDoubleSafelyNoPromotion(%union.u* byval(%union.u) align 16 %arg) { +; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafelyNoPromotion +; CHECK-SAME: ([[UNION_U]]* byval([[UNION_U]]) align 16 [[ARG:%.*]]) { ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], [[UNION_U]]* [[ARG]], i64 0, i32 0 -; CHECK-NEXT: [[IDX_P:%.*]] = alloca i64, align 8 -; CHECK-NEXT: store i64 0, i64* [[IDX_P]], align 8 -; CHECK-NEXT: [[IDX:%.*]] = load i64, i64* [[IDX_P]], align 8 +; CHECK-NEXT: [[TMP_IDX:%.*]] = alloca i64, align 8 +; CHECK-NEXT: store i64 0, i64* [[TMP_IDX]], align 8 +; CHECK-NEXT: [[IDX:%.*]] = load i64, i64* [[TMP_IDX]], align 8 ; CHECK-NEXT: [[GEP_IDX:%.*]] = getelementptr inbounds [[UNION_U]], [[UNION_U]]* [[ARG]], i64 [[IDX]], i32 0 -; CHECK-NEXT: [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]], align 16 +; CHECK-NEXT: [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]] ; CHECK-NEXT: ret x86_fp80 [[FP80]] ; %gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0 %idx_slot = alloca i64, align 8 store i64 0, i64* %idx_slot, align 8 %idx = load i64, i64* %idx_slot, align 8 - %gep_idx = getelementptr inbounds %union.u, %union.u* %arg, i64 %idx, i32 0 ; to protect from "usual" promotion + %gep_idx = getelementptr inbounds %union.u, %union.u* %arg, i64 %idx, i32 0 ; to protect from promotion %fp80 = load x86_fp80, x86_fp80* %gep ret x86_fp80 %fp80 }