Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -2071,6 +2071,10 @@ hasUsedSRet = true; if (RetAI.getInReg()) SRETAttrs.addAttribute(llvm::Attribute::InReg); + unsigned Align = RetAI.getIndirectAlign().getQuantity(); + if (Align < + getDataLayout().getABITypeAlignment(getTypes().ConvertType(RetTy))) + SRETAttrs.addAlignmentAttr(Align); ArgAttrs[IRFunctionArgs.getSRetArgNo()] = llvm::AttributeSet::get(getLLVMContext(), SRETAttrs); } Index: clang/test/CodeGen/aligned-sret.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aligned-sret.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple x86_64-apple-macos %s -S -emit-llvm -o- | FileCheck %s + +typedef __attribute__((__ext_vector_type__(4),__aligned__(16))) double simd_double4; +typedef struct { simd_double4 columns[4]; } simd_double4x4; +typedef simd_double4x4 matrix_double4x4; + +// CHECK: define void @ident(%struct.simd_double4x4* noalias sret align 16 %agg.result +matrix_double4x4 ident(matrix_double4x4 x) { + return x; +} Index: clang/test/CodeGen/arm-varargs.c =================================================================== --- clang/test/CodeGen/arm-varargs.c +++ clang/test/CodeGen/arm-varargs.c @@ -201,7 +201,7 @@ long long val; } underaligned_long_long_struct; underaligned_long_long_struct underaligned_long_long_struct_test() { -// CHECK-LABEL: define void @underaligned_long_long_struct_test(%struct.underaligned_long_long_struct* noalias sret %agg.result) +// CHECK-LABEL: define void @underaligned_long_long_struct_test(%struct.underaligned_long_long_struct* noalias sret align 2 %agg.result) return va_arg(the_list, underaligned_long_long_struct); // CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4 // CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 8 @@ -279,7 +279,7 @@ long long val __attribute__((packed,aligned(2))); } underaligned_long_long_struct_member; underaligned_long_long_struct_member underaligned_long_long_struct_member_test() { -// CHECK-LABEL: define void @underaligned_long_long_struct_member_test(%struct.underaligned_long_long_struct_member* noalias sret %agg.result) +// CHECK-LABEL: define void @underaligned_long_long_struct_member_test(%struct.underaligned_long_long_struct_member* noalias sret align 2 %agg.result) return va_arg(the_list, underaligned_long_long_struct_member); // CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4 // CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 8 Index: clang/test/CodeGen/arm64-arguments.c =================================================================== --- clang/test/CodeGen/arm64-arguments.c +++ clang/test/CodeGen/arm64-arguments.c @@ -181,9 +181,9 @@ // CHECK: define <4 x float> @f1_1(<4 x float> %{{.*}}) T_float32x4 f1_1(T_float32x4 a0) { return a0; } // Vector with length bigger than 16-byte is illegal and is passed indirectly. -// CHECK: define void @f1_2(<8 x float>* noalias sret %{{.*}}, <8 x float>* %0) +// CHECK: define void @f1_2(<8 x float>* noalias sret align 16 %{{.*}}, <8 x float>* %0) T_float32x8 f1_2(T_float32x8 a0) { return a0; } -// CHECK: define void @f1_3(<16 x float>* noalias sret %{{.*}}, <16 x float>* %0) +// CHECK: define void @f1_3(<16 x float>* noalias sret align 16 %{{.*}}, <16 x float>* %0) T_float32x16 f1_3(T_float32x16 a0) { return a0; } // Testing alignment with aggregates: HFA, aggregates with size <= 16 bytes and Index: clang/test/CodeGen/arm64_32.c =================================================================== --- clang/test/CodeGen/arm64_32.c +++ clang/test/CodeGen/arm64_32.c @@ -27,4 +27,4 @@ typedef float __attribute__((ext_vector_type(16))) v16f32; v16f32 func(v16f32 in) { return in; } -// CHECK: define void @func(<16 x float>* noalias sret {{%.*}}, <16 x float> {{%.*}}) +// CHECK: define void @func(<16 x float>* noalias sret align 16 {{%.*}}, <16 x float> {{%.*}}) Index: clang/test/CodeGen/systemz-abi-vector.c =================================================================== --- clang/test/CodeGen/systemz-abi-vector.c +++ clang/test/CodeGen/systemz-abi-vector.c @@ -71,7 +71,7 @@ v32i8 pass_v32i8(v32i8 arg) { return arg; } // CHECK-LABEL: define void @pass_v32i8(<32 x i8>* noalias sret %{{.*}}, <32 x i8>* %0) -// CHECK-VECTOR-LABEL: define void @pass_v32i8(<32 x i8>* noalias sret %{{.*}}, <32 x i8>* %0) +// CHECK-VECTOR-LABEL: define void @pass_v32i8(<32 x i8>* noalias sret align 8 %{{.*}}, <32 x i8>* %0) v1i16 pass_v1i16(v1i16 arg) { return arg; } // CHECK-LABEL: define void @pass_v1i16(<1 x i16>* noalias sret %{{.*}}, <1 x i16>* %0) @@ -168,7 +168,7 @@ struct agg_v32i8 { v32i8 a; }; struct agg_v32i8 pass_agg_v32i8(struct agg_v32i8 arg) { return arg; } // CHECK-LABEL: define void @pass_agg_v32i8(%struct.agg_v32i8* noalias sret %{{.*}}, %struct.agg_v32i8* %{{.*}}) -// CHECK-VECTOR-LABEL: define void @pass_agg_v32i8(%struct.agg_v32i8* noalias sret %{{.*}}, %struct.agg_v32i8* %{{.*}}) +// CHECK-VECTOR-LABEL: define void @pass_agg_v32i8(%struct.agg_v32i8* noalias sret align 8 %{{.*}}, %struct.agg_v32i8* %{{.*}}) // Verify that the following are *not* vector-like aggregate types @@ -379,7 +379,7 @@ // CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi <32 x i8>** [ [[REG_ADDR]], %{{.*}} ], [ [[MEM_ADDR]], %{{.*}} ] // CHECK: [[INDIRECT_ARG:%[^ ]+]] = load <32 x i8>*, <32 x i8>** [[VA_ARG_ADDR]] // CHECK: ret void -// CHECK-VECTOR-LABEL: define void @va_v32i8(<32 x i8>* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}) +// CHECK-VECTOR-LABEL: define void @va_v32i8(<32 x i8>* noalias sret align 8 %{{.*}}, %struct.__va_list_tag* %{{.*}}) // CHECK-VECTOR: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0 // CHECK-VECTOR: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]] // CHECK-VECTOR: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5 @@ -576,7 +576,7 @@ // CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi %struct.agg_v32i8** [ [[REG_ADDR]], %{{.*}} ], [ [[MEM_ADDR]], %{{.*}} ] // CHECK: [[INDIRECT_ARG:%[^ ]+]] = load %struct.agg_v32i8*, %struct.agg_v32i8** [[VA_ARG_ADDR]] // CHECK: ret void -// CHECK-VECTOR-LABEL: define void @va_agg_v32i8(%struct.agg_v32i8* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}) +// CHECK-VECTOR-LABEL: define void @va_agg_v32i8(%struct.agg_v32i8* noalias sret align 8 %{{.*}}, %struct.__va_list_tag* %{{.*}}) // CHECK-VECTOR: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0 // CHECK-VECTOR: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]] // CHECK-VECTOR: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5