diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -517,8 +517,10 @@ return false; int64_t Off = Offset.getSExtValue(); + Align ArgAlign = Arg->getParamAlign().valueOrOne(); + Align PartAlign = std::max(LI->getAlign(), ArgAlign); auto Pair = ArgParts.try_emplace( - Off, ArgPart{Ty, LI->getAlign(), GuaranteedToExecute ? LI : nullptr}); + Off, ArgPart{Ty, PartAlign, GuaranteedToExecute ? LI : nullptr}); ArgPart &Part = Pair.first->second; bool OffsetNotSeenBefore = Pair.second; @@ -545,20 +547,20 @@ // because we only allow a single type for a given offset, which also means // that the number of accessed bytes will be the same. if (!GuaranteedToExecute && - (OffsetNotSeenBefore || Part.Alignment < LI->getAlign())) { + (OffsetNotSeenBefore || Part.Alignment < PartAlign)) { // We won't be able to prove dereferenceability for negative offsets. if (Off < 0) return false; // If the offset is not aligned, an aligned base pointer won't help. - if (!isAligned(LI->getAlign(), Off)) + if (!isAligned(PartAlign, Off)) return false; NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue()); - NeededAlign = std::max(NeededAlign, LI->getAlign()); + NeededAlign = std::max(NeededAlign, PartAlign); } - Part.Alignment = std::max(Part.Alignment, LI->getAlign()); + Part.Alignment = std::max(Part.Alignment, PartAlign); return true; }; diff --git a/llvm/test/Transforms/ArgumentPromotion/align.ll b/llvm/test/Transforms/ArgumentPromotion/align.ll --- a/llvm/test/Transforms/ArgumentPromotion/align.ll +++ b/llvm/test/Transforms/ArgumentPromotion/align.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes ; RUN: opt -S -argpromotion < %s | FileCheck %s +%struct.ss = type { i32, i64 } + define internal i32 @callee_must_exec(i32* %p) { ; CHECK-LABEL: define {{[^@]+}}@callee_must_exec ; CHECK-SAME: (i32 [[P_0_VAL:%.*]]) { @@ -112,3 +114,117 @@ call i32 @callee_not_guaranteed_aligned(i1 %c, i32* %p) ret void } + +define internal void @callee_load_from_aligned_1(%struct.ss* align 16 %b) { +; CHECK-LABEL: define {{[^@]+}}@callee_load_from_aligned_1 +; CHECK-SAME: (i32 [[B_0:%.*]]) { +; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[B_0]], 1 +; CHECK-NEXT: ret void +; + %temp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %temp1 = load i32, i32* %temp, align 4 + %temp2 = add i32 %temp1, 1 + %temp3 = load i32, i32* %temp, align 8 + ret void +} + +define internal void @callee_load_from_aligned_2(%struct.ss* align 16 %b) { +; CHECK-LABEL: define {{[^@]+}}@callee_load_from_aligned_2 +; CHECK-SAME: (i32 [[B_0:%.*]]) { +; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[B_0]], 1 +; CHECK-NEXT: ret void +; + %temp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %temp1 = load i32, i32* %temp, align 4 + %temp2 = add i32 %temp1, 1 + %temp3 = load i32, i32* %temp, align 32 + ret void +} + +define internal void @callee_load_from_aligned_3(%struct.ss* %b) { +; CHECK-LABEL: define {{[^@]+}}@callee_load_from_aligned_3 +; CHECK-SAME: (i32 [[B_0:%.*]]) { +; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[B_0]], 1 +; CHECK-NEXT: ret void +; + %temp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %temp1 = load i32, i32* %temp, align 4 + %temp2 = add i32 %temp1, 1 + %temp3 = load i32, i32* %temp, align 8 + ret void +} + +define i32 @caller_load_from_aligned() { +; CHECK-LABEL: define {{[^@]+}}@caller_load_from_aligned +; CHECK-SAME: () { +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 16 +; CHECK-NEXT: [[TEMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: store i32 1, i32* [[TEMP1]], align 16 +; CHECK-NEXT: [[TEMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: store i64 2, i64* [[TEMP4]], align 4 +; CHECK-NEXT: [[S_1_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0 +; CHECK-NEXT: [[S_1_0_VAL:%.*]] = load i32, i32* [[S_1_0]], align 16 +; CHECK-NEXT: call void @callee_load_from_aligned_1(i32 [[S_1_0_VAL]]) +; CHECK-NEXT: [[S_2_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0 +; CHECK-NEXT: [[S_2_0_VAL:%.*]] = load i32, i32* [[S_2_0]], align 32 +; CHECK-NEXT: call void @callee_load_from_aligned_2(i32 [[S_2_0_VAL]]) +; CHECK-NEXT: [[S_3_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0 +; CHECK-NEXT: [[S_3_0_VAL:%.*]] = load i32, i32* [[S_3_0]], align 8 +; CHECK-NEXT: call void @callee_load_from_aligned_3(i32 [[S_3_0_VAL]]) +; CHECK-NEXT: ret i32 0 +; + %S = alloca %struct.ss, align 16 + %temp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + store i32 1, i32* %temp1, align 16 + %temp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i64 2, i64* %temp4, align 4 + call void @callee_load_from_aligned_1(%struct.ss* %S) + call void @callee_load_from_aligned_2(%struct.ss* %S) + call void @callee_load_from_aligned_3(%struct.ss* %S) + ret i32 0 +} + +define internal void @callee_load_first_element(%struct.ss* byval(%struct.ss) align 16 %b) { +; CHECK-LABEL: define {{[^@]+}}@callee_load_first_element +; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) { +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 16 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 16 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 +; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[TEMP1:%.*]] = load i32, i32* [[TEMP]], align 8 +; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[TEMP1]], 1 +; CHECK-NEXT: [[TEMP3:%.*]] = load i32, i32* [[TEMP]], align 32 +; CHECK-NEXT: ret void +; + %temp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %temp1 = load i32, i32* %temp, align 8 + %temp2 = add i32 %temp1, 1 + %temp3 = load i32, i32* %temp, align 32 + ret void +} + +define i32 @caller_load_first_element() { +; CHECK-LABEL: define {{[^@]+}}@caller_load_first_element +; CHECK-SAME: () { +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 16 +; CHECK-NEXT: [[TEMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: store i32 1, i32* [[TEMP1]], align 16 +; CHECK-NEXT: [[TEMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: store i64 2, i64* [[TEMP4]], align 4 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 16 +; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 +; CHECK-NEXT: call void @callee_load_first_element(i32 [[S_0_VAL]], i64 [[S_1_VAL]]) +; CHECK-NEXT: ret i32 0 +; + %S = alloca %struct.ss, align 16 + %temp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + store i32 1, i32* %temp1, align 16 + %temp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i64 2, i64* %temp4, align 4 + call void @callee_load_first_element(%struct.ss* byval(%struct.ss) align 16 %S) + ret i32 0 +}