Index: lib/CodeGen/CGBuilder.h =================================================================== --- lib/CodeGen/CGBuilder.h +++ lib/CodeGen/CGBuilder.h @@ -258,23 +258,23 @@ using CGBuilderBaseTy::CreateMemCpy; llvm::CallInst *CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile = false) { - auto Align = std::min(Dest.getAlignment(), Src.getAlignment()); - return CreateMemCpy(Dest.getPointer(), Src.getPointer(), Size, - Align.getQuantity(), IsVolatile); + return CreateMemCpy(Dest.getPointer(), Dest.getAlignment().getQuantity(), + Src.getPointer(), Src.getAlignment().getQuantity(), + Size,IsVolatile); } llvm::CallInst *CreateMemCpy(Address Dest, Address Src, uint64_t Size, bool IsVolatile = false) { - auto Align = std::min(Dest.getAlignment(), Src.getAlignment()); - return CreateMemCpy(Dest.getPointer(), Src.getPointer(), Size, - Align.getQuantity(), IsVolatile); + return CreateMemCpy(Dest.getPointer(), Dest.getAlignment().getQuantity(), + Src.getPointer(), Src.getAlignment().getQuantity(), + Size, IsVolatile); } using CGBuilderBaseTy::CreateMemMove; llvm::CallInst *CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile = false) { - auto Align = std::min(Dest.getAlignment(), Src.getAlignment()); - return CreateMemMove(Dest.getPointer(), Src.getPointer(), Size, - Align.getQuantity(), IsVolatile); + return CreateMemMove(Dest.getPointer(), Dest.getAlignment().getQuantity(), + Src.getPointer(), Src.getAlignment().getQuantity(), + Size, IsVolatile); } using CGBuilderBaseTy::CreateMemSet; Index: test/CodeGen/2007-11-07-CopyAggregateAlign.c =================================================================== --- test/CodeGen/2007-11-07-CopyAggregateAlign.c +++ test/CodeGen/2007-11-07-CopyAggregateAlign.c @@ -2,6 +2,6 @@ struct A { char s, t, u, v; short a; }; // CHECK: %a = alloca %struct.A, align 2 // CHECK: %b = alloca %struct.A, align 2 -// CHECK: call void @llvm.memcpy.p0i8.p0i8.{{.*}}, i32 2, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.{{.*}} align 2 {{.*}} align 2 {{.*}}, i1 false) void q() { struct A a, b; a = b; } Index: test/CodeGen/2007-11-07-ZeroAggregateAlign.c =================================================================== --- test/CodeGen/2007-11-07-ZeroAggregateAlign.c +++ test/CodeGen/2007-11-07-ZeroAggregateAlign.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s struct A { short s; short t; int i; }; // CHECK: %a = alloca %struct.A, align 4 -// CHECK: call void @llvm.memset.p0i8.{{.*}}i32 4, i1 false) +// CHECK: call void @llvm.memset.p0i8.{{.*}} align 4 {{.*}}, i1 false) void q() { struct A a = {0}; } Index: test/CodeGen/64bit-swiftcall.c =================================================================== --- test/CodeGen/64bit-swiftcall.c +++ test/CodeGen/64bit-swiftcall.c @@ -216,10 +216,10 @@ // CHECK: [[RET:%.*]] = alloca [[STRUCT:%.*]], align 1 // CHECK: [[RES:%.*]] = alloca [[STRUCT]], align 1 // CHECK: [[CAST:%.*]] = bitcast [[STRUCT]]* [[RES]] to i8* -// CHECK: call void @llvm.memset{{.*}}(i8* [[CAST]], i8 0, i64 5 +// CHECK: call void @llvm.memset{{.*}}(i8* align 1 [[CAST]], i8 0, i64 5 // CHECK: [[CASTRET:%.*]] = bitcast [[STRUCT]]* [[RET]] to i8* // CHECK: [[CASTRES:%.*]] = bitcast [[STRUCT]]* [[RES]] to i8* -// CHECK: call void @llvm.memcpy{{.*}}(i8* [[CASTRET]], i8* [[CASTRES]], i64 5 +// CHECK: call void @llvm.memcpy{{.*}}(i8* align 1 [[CASTRET]], i8* align 1 [[CASTRES]], i64 5 // CHECK: [[CAST:%.*]] = bitcast [[STRUCT]]* [[RET]] to { i64 }* // CHECK: [[GEP:%.*]] = getelementptr inbounds { i64 }, { i64 }* [[CAST]], i32 0, i32 0 // CHECK: [[R0:%.*]] = load i64, i64* [[GEP]], align 1 @@ -269,10 +269,10 @@ // CHECK: [[RET:%.*]] = alloca [[UNION:%.*]], align 8 // CHECK: [[RES:%.*]] = alloca [[UNION]], align 8 // CHECK: [[CAST:%.*]] = bitcast [[UNION]]* [[RES]] to i8* -// CHECK: call void @llvm.memcpy{{.*}}(i8* [[CAST]] +// CHECK: call void @llvm.memcpy{{.*}}(i8* align 8 [[CAST]] // CHECK: [[CASTRET:%.*]] = bitcast [[UNION]]* [[RET]] to i8* // CHECK: [[CASTRES:%.*]] = bitcast [[UNION]]* [[RES]] to i8* -// CHECK: call void @llvm.memcpy{{.*}}(i8* [[CASTRET]], i8* [[CASTRES]] +// CHECK: call void @llvm.memcpy{{.*}}(i8* align 8 [[CASTRET]], i8* align 8 [[CASTRES]] // CHECK: [[CAST:%.*]] = bitcast [[UNION]]* [[RET]] to { i64 }* // CHECK: [[GEP:%.*]] = getelementptr inbounds { i64 }, { i64 }* [[CAST]], i32 0, i32 0 // CHECK: [[R0:%.*]] = load i64, i64* [[GEP]], align 8 Index: test/CodeGen/aarch64-neon-intrinsics.c =================================================================== --- test/CodeGen/aarch64-neon-intrinsics.c +++ test/CodeGen/aarch64-neon-intrinsics.c @@ -9203,7 +9203,7 @@ // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint8x16x2_t [[TMP5]] uint8x16x2_t test_vld2q_u8(uint8_t const *a) { @@ -9221,7 +9221,7 @@ // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint16x8x2_t [[TMP6]] uint16x8x2_t test_vld2q_u16(uint16_t const *a) { @@ -9239,7 +9239,7 @@ // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint32x4x2_t [[TMP6]] uint32x4x2_t test_vld2q_u32(uint32_t const *a) { @@ -9257,7 +9257,7 @@ // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x2_t [[TMP6]] uint64x2x2_t test_vld2q_u64(uint64_t const *a) { @@ -9274,7 +9274,7 @@ // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int8x16x2_t [[TMP5]] int8x16x2_t test_vld2q_s8(int8_t const *a) { @@ -9292,7 +9292,7 @@ // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int16x8x2_t [[TMP6]] int16x8x2_t test_vld2q_s16(int16_t const *a) { @@ -9310,7 +9310,7 @@ // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int32x4x2_t [[TMP6]] int32x4x2_t test_vld2q_s32(int32_t const *a) { @@ -9328,7 +9328,7 @@ // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x2_t [[TMP6]] int64x2x2_t test_vld2q_s64(int64_t const *a) { @@ -9346,7 +9346,7 @@ // CHECK: store { <8 x half>, <8 x half> } [[VLD2]], { <8 x half>, <8 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float16x8x2_t [[TMP6]] float16x8x2_t test_vld2q_f16(float16_t const *a) { @@ -9364,7 +9364,7 @@ // CHECK: store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float32x4x2_t [[TMP6]] float32x4x2_t test_vld2q_f32(float32_t const *a) { @@ -9382,7 +9382,7 @@ // CHECK: store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x2_t [[TMP6]] float64x2x2_t test_vld2q_f64(float64_t const *a) { @@ -9399,7 +9399,7 @@ // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly8x16x2_t [[TMP5]] poly8x16x2_t test_vld2q_p8(poly8_t const *a) { @@ -9417,7 +9417,7 @@ // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly16x8x2_t [[TMP6]] poly16x8x2_t test_vld2q_p16(poly16_t const *a) { @@ -9434,7 +9434,7 @@ // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint8x8x2_t [[TMP5]] uint8x8x2_t test_vld2_u8(uint8_t const *a) { @@ -9452,7 +9452,7 @@ // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint16x4x2_t [[TMP6]] uint16x4x2_t test_vld2_u16(uint16_t const *a) { @@ -9470,7 +9470,7 @@ // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint32x2x2_t [[TMP6]] uint32x2x2_t test_vld2_u32(uint32_t const *a) { @@ -9488,7 +9488,7 @@ // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint64x1x2_t [[TMP6]] uint64x1x2_t test_vld2_u64(uint64_t const *a) { @@ -9505,7 +9505,7 @@ // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int8x8x2_t [[TMP5]] int8x8x2_t test_vld2_s8(int8_t const *a) { @@ -9523,7 +9523,7 @@ // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int16x4x2_t [[TMP6]] int16x4x2_t test_vld2_s16(int16_t const *a) { @@ -9541,7 +9541,7 @@ // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int32x2x2_t [[TMP6]] int32x2x2_t test_vld2_s32(int32_t const *a) { @@ -9559,7 +9559,7 @@ // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int64x1x2_t [[TMP6]] int64x1x2_t test_vld2_s64(int64_t const *a) { @@ -9577,7 +9577,7 @@ // CHECK: store { <4 x half>, <4 x half> } [[VLD2]], { <4 x half>, <4 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float16x4x2_t [[TMP6]] float16x4x2_t test_vld2_f16(float16_t const *a) { @@ -9595,7 +9595,7 @@ // CHECK: store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float32x2x2_t [[TMP6]] float32x2x2_t test_vld2_f32(float32_t const *a) { @@ -9613,7 +9613,7 @@ // CHECK: store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x2_t [[TMP6]] float64x1x2_t test_vld2_f64(float64_t const *a) { @@ -9630,7 +9630,7 @@ // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly8x8x2_t [[TMP5]] poly8x8x2_t test_vld2_p8(poly8_t const *a) { @@ -9648,7 +9648,7 @@ // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly16x4x2_t [[TMP6]] poly16x4x2_t test_vld2_p16(poly16_t const *a) { @@ -9665,7 +9665,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint8x16x3_t [[TMP5]] uint8x16x3_t test_vld3q_u8(uint8_t const *a) { @@ -9683,7 +9683,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint16x8x3_t [[TMP6]] uint16x8x3_t test_vld3q_u16(uint16_t const *a) { @@ -9701,7 +9701,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint32x4x3_t [[TMP6]] uint32x4x3_t test_vld3q_u32(uint32_t const *a) { @@ -9719,7 +9719,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x3_t [[TMP6]] uint64x2x3_t test_vld3q_u64(uint64_t const *a) { @@ -9736,7 +9736,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int8x16x3_t [[TMP5]] int8x16x3_t test_vld3q_s8(int8_t const *a) { @@ -9754,7 +9754,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int16x8x3_t [[TMP6]] int16x8x3_t test_vld3q_s16(int16_t const *a) { @@ -9772,7 +9772,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int32x4x3_t [[TMP6]] int32x4x3_t test_vld3q_s32(int32_t const *a) { @@ -9790,7 +9790,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x3_t [[TMP6]] int64x2x3_t test_vld3q_s64(int64_t const *a) { @@ -9808,7 +9808,7 @@ // CHECK: store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float16x8x3_t [[TMP6]] float16x8x3_t test_vld3q_f16(float16_t const *a) { @@ -9826,7 +9826,7 @@ // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float32x4x3_t [[TMP6]] float32x4x3_t test_vld3q_f32(float32_t const *a) { @@ -9844,7 +9844,7 @@ // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x3_t [[TMP6]] float64x2x3_t test_vld3q_f64(float64_t const *a) { @@ -9861,7 +9861,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 48, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly8x16x3_t [[TMP5]] poly8x16x3_t test_vld3q_p8(poly8_t const *a) { @@ -9879,7 +9879,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly16x8x3_t [[TMP6]] poly16x8x3_t test_vld3q_p16(poly16_t const *a) { @@ -9896,7 +9896,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint8x8x3_t [[TMP5]] uint8x8x3_t test_vld3_u8(uint8_t const *a) { @@ -9914,7 +9914,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint16x4x3_t [[TMP6]] uint16x4x3_t test_vld3_u16(uint16_t const *a) { @@ -9932,7 +9932,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint32x2x3_t [[TMP6]] uint32x2x3_t test_vld3_u32(uint32_t const *a) { @@ -9950,7 +9950,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint64x1x3_t [[TMP6]] uint64x1x3_t test_vld3_u64(uint64_t const *a) { @@ -9967,7 +9967,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int8x8x3_t [[TMP5]] int8x8x3_t test_vld3_s8(int8_t const *a) { @@ -9985,7 +9985,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int16x4x3_t [[TMP6]] int16x4x3_t test_vld3_s16(int16_t const *a) { @@ -10003,7 +10003,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int32x2x3_t [[TMP6]] int32x2x3_t test_vld3_s32(int32_t const *a) { @@ -10021,7 +10021,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int64x1x3_t [[TMP6]] int64x1x3_t test_vld3_s64(int64_t const *a) { @@ -10039,7 +10039,7 @@ // CHECK: store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float16x4x3_t [[TMP6]] float16x4x3_t test_vld3_f16(float16_t const *a) { @@ -10057,7 +10057,7 @@ // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float32x2x3_t [[TMP6]] float32x2x3_t test_vld3_f32(float32_t const *a) { @@ -10075,7 +10075,7 @@ // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x3_t [[TMP6]] float64x1x3_t test_vld3_f64(float64_t const *a) { @@ -10092,7 +10092,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 24, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly8x8x3_t [[TMP5]] poly8x8x3_t test_vld3_p8(poly8_t const *a) { @@ -10110,7 +10110,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly16x4x3_t [[TMP6]] poly16x4x3_t test_vld3_p16(poly16_t const *a) { @@ -10127,7 +10127,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint8x16x4_t [[TMP5]] uint8x16x4_t test_vld4q_u8(uint8_t const *a) { @@ -10145,7 +10145,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint16x8x4_t [[TMP6]] uint16x8x4_t test_vld4q_u16(uint16_t const *a) { @@ -10163,7 +10163,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint32x4x4_t [[TMP6]] uint32x4x4_t test_vld4q_u32(uint32_t const *a) { @@ -10181,7 +10181,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x4_t [[TMP6]] uint64x2x4_t test_vld4q_u64(uint64_t const *a) { @@ -10198,7 +10198,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int8x16x4_t [[TMP5]] int8x16x4_t test_vld4q_s8(int8_t const *a) { @@ -10216,7 +10216,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int16x8x4_t [[TMP6]] int16x8x4_t test_vld4q_s16(int16_t const *a) { @@ -10234,7 +10234,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int32x4x4_t [[TMP6]] int32x4x4_t test_vld4q_s32(int32_t const *a) { @@ -10252,7 +10252,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x4_t [[TMP6]] int64x2x4_t test_vld4q_s64(int64_t const *a) { @@ -10270,7 +10270,7 @@ // CHECK: store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float16x8x4_t [[TMP6]] float16x8x4_t test_vld4q_f16(float16_t const *a) { @@ -10288,7 +10288,7 @@ // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float32x4x4_t [[TMP6]] float32x4x4_t test_vld4q_f32(float32_t const *a) { @@ -10306,7 +10306,7 @@ // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x4_t [[TMP6]] float64x2x4_t test_vld4q_f64(float64_t const *a) { @@ -10323,7 +10323,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 64, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly8x16x4_t [[TMP5]] poly8x16x4_t test_vld4q_p8(poly8_t const *a) { @@ -10341,7 +10341,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly16x8x4_t [[TMP6]] poly16x8x4_t test_vld4q_p16(poly16_t const *a) { @@ -10358,7 +10358,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint8x8x4_t [[TMP5]] uint8x8x4_t test_vld4_u8(uint8_t const *a) { @@ -10376,7 +10376,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint16x4x4_t [[TMP6]] uint16x4x4_t test_vld4_u16(uint16_t const *a) { @@ -10394,7 +10394,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint32x2x4_t [[TMP6]] uint32x2x4_t test_vld4_u32(uint32_t const *a) { @@ -10412,7 +10412,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint64x1x4_t [[TMP6]] uint64x1x4_t test_vld4_u64(uint64_t const *a) { @@ -10429,7 +10429,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int8x8x4_t [[TMP5]] int8x8x4_t test_vld4_s8(int8_t const *a) { @@ -10447,7 +10447,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int16x4x4_t [[TMP6]] int16x4x4_t test_vld4_s16(int16_t const *a) { @@ -10465,7 +10465,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int32x2x4_t [[TMP6]] int32x2x4_t test_vld4_s32(int32_t const *a) { @@ -10483,7 +10483,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int64x1x4_t [[TMP6]] int64x1x4_t test_vld4_s64(int64_t const *a) { @@ -10501,7 +10501,7 @@ // CHECK: store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float16x4x4_t [[TMP6]] float16x4x4_t test_vld4_f16(float16_t const *a) { @@ -10519,7 +10519,7 @@ // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float32x2x4_t [[TMP6]] float32x2x4_t test_vld4_f32(float32_t const *a) { @@ -10537,7 +10537,7 @@ // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x4_t [[TMP6]] float64x1x4_t test_vld4_f64(float64_t const *a) { @@ -10554,7 +10554,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 32, i1 false) // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly8x8x4_t [[TMP5]] poly8x8x4_t test_vld4_p8(poly8_t const *a) { @@ -10572,7 +10572,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly16x4x4_t [[TMP6]] poly16x4x4_t test_vld4_p16(poly16_t const *a) { @@ -10854,7 +10854,7 @@ // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -10874,7 +10874,7 @@ // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -10899,7 +10899,7 @@ // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -10924,7 +10924,7 @@ // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -10949,7 +10949,7 @@ // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -10969,7 +10969,7 @@ // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -10994,7 +10994,7 @@ // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -11019,7 +11019,7 @@ // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -11044,7 +11044,7 @@ // CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0 @@ -11069,7 +11069,7 @@ // CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0 @@ -11094,7 +11094,7 @@ // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0 @@ -11119,7 +11119,7 @@ // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -11139,7 +11139,7 @@ // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -11164,7 +11164,7 @@ // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -11184,7 +11184,7 @@ // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -11209,7 +11209,7 @@ // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -11234,7 +11234,7 @@ // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -11259,7 +11259,7 @@ // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -11279,7 +11279,7 @@ // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -11304,7 +11304,7 @@ // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -11329,7 +11329,7 @@ // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -11354,7 +11354,7 @@ // CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0 @@ -11379,7 +11379,7 @@ // CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0 @@ -11404,7 +11404,7 @@ // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0 @@ -11429,7 +11429,7 @@ // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -11449,7 +11449,7 @@ // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -11474,7 +11474,7 @@ // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -11497,7 +11497,7 @@ // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -11527,7 +11527,7 @@ // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -11557,7 +11557,7 @@ // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -11587,7 +11587,7 @@ // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -11610,7 +11610,7 @@ // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -11640,7 +11640,7 @@ // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -11670,7 +11670,7 @@ // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -11700,7 +11700,7 @@ // CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0 @@ -11730,7 +11730,7 @@ // CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0 @@ -11760,7 +11760,7 @@ // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0 @@ -11790,7 +11790,7 @@ // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -11813,7 +11813,7 @@ // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -11843,7 +11843,7 @@ // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -11866,7 +11866,7 @@ // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -11896,7 +11896,7 @@ // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -11926,7 +11926,7 @@ // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -11956,7 +11956,7 @@ // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -11979,7 +11979,7 @@ // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -12009,7 +12009,7 @@ // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -12039,7 +12039,7 @@ // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -12069,7 +12069,7 @@ // CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0 @@ -12099,7 +12099,7 @@ // CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0 @@ -12129,7 +12129,7 @@ // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0 @@ -12159,7 +12159,7 @@ // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -12182,7 +12182,7 @@ // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -12212,7 +12212,7 @@ // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -12238,7 +12238,7 @@ // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -12273,7 +12273,7 @@ // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -12308,7 +12308,7 @@ // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -12343,7 +12343,7 @@ // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -12369,7 +12369,7 @@ // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -12404,7 +12404,7 @@ // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -12439,7 +12439,7 @@ // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -12474,7 +12474,7 @@ // CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0 @@ -12509,7 +12509,7 @@ // CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0 @@ -12544,7 +12544,7 @@ // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0 @@ -12579,7 +12579,7 @@ // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -12605,7 +12605,7 @@ // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -12640,7 +12640,7 @@ // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -12666,7 +12666,7 @@ // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -12701,7 +12701,7 @@ // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -12736,7 +12736,7 @@ // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -12771,7 +12771,7 @@ // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -12797,7 +12797,7 @@ // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -12832,7 +12832,7 @@ // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -12867,7 +12867,7 @@ // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -12902,7 +12902,7 @@ // CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0 @@ -12937,7 +12937,7 @@ // CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0 @@ -12972,7 +12972,7 @@ // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0 @@ -13007,7 +13007,7 @@ // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -13033,7 +13033,7 @@ // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -13070,7 +13070,7 @@ // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 32, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint8x16x2_t [[TMP4]] uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) { @@ -13088,7 +13088,7 @@ // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint16x8x2_t [[TMP6]] uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) { @@ -13106,7 +13106,7 @@ // CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint32x4x2_t [[TMP6]] uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) { @@ -13124,7 +13124,7 @@ // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x2_t [[TMP6]] uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) { @@ -13140,7 +13140,7 @@ // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 32, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int8x16x2_t [[TMP4]] int8x16x2_t test_vld1q_s8_x2(int8_t const *a) { @@ -13158,7 +13158,7 @@ // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int16x8x2_t [[TMP6]] int16x8x2_t test_vld1q_s16_x2(int16_t const *a) { @@ -13176,7 +13176,7 @@ // CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int32x4x2_t [[TMP6]] int32x4x2_t test_vld1q_s32_x2(int32_t const *a) { @@ -13194,7 +13194,7 @@ // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x2_t [[TMP6]] int64x2x2_t test_vld1q_s64_x2(int64_t const *a) { @@ -13212,7 +13212,7 @@ // CHECK: store { <8 x half>, <8 x half> } [[VLD1XN]], { <8 x half>, <8 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float16x8x2_t [[TMP6]] float16x8x2_t test_vld1q_f16_x2(float16_t const *a) { @@ -13230,7 +13230,7 @@ // CHECK: store { <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float32x4x2_t [[TMP6]] float32x4x2_t test_vld1q_f32_x2(float32_t const *a) { @@ -13248,7 +13248,7 @@ // CHECK: store { <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x2_t [[TMP6]] float64x2x2_t test_vld1q_f64_x2(float64_t const *a) { @@ -13264,7 +13264,7 @@ // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 32, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly8x16x2_t [[TMP4]] poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) { @@ -13282,7 +13282,7 @@ // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly16x8x2_t [[TMP6]] poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) { @@ -13300,7 +13300,7 @@ // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x2_t [[TMP6]] poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) { @@ -13316,7 +13316,7 @@ // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint8x8x2_t [[TMP4]] uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) { @@ -13334,7 +13334,7 @@ // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint16x4x2_t [[TMP6]] uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) { @@ -13352,7 +13352,7 @@ // CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint32x2x2_t [[TMP6]] uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) { @@ -13370,7 +13370,7 @@ // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint64x1x2_t [[TMP6]] uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) { @@ -13386,7 +13386,7 @@ // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int8x8x2_t [[TMP4]] int8x8x2_t test_vld1_s8_x2(int8_t const *a) { @@ -13404,7 +13404,7 @@ // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int16x4x2_t [[TMP6]] int16x4x2_t test_vld1_s16_x2(int16_t const *a) { @@ -13422,7 +13422,7 @@ // CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int32x2x2_t [[TMP6]] int32x2x2_t test_vld1_s32_x2(int32_t const *a) { @@ -13440,7 +13440,7 @@ // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int64x1x2_t [[TMP6]] int64x1x2_t test_vld1_s64_x2(int64_t const *a) { @@ -13458,7 +13458,7 @@ // CHECK: store { <4 x half>, <4 x half> } [[VLD1XN]], { <4 x half>, <4 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float16x4x2_t [[TMP6]] float16x4x2_t test_vld1_f16_x2(float16_t const *a) { @@ -13476,7 +13476,7 @@ // CHECK: store { <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float32x2x2_t [[TMP6]] float32x2x2_t test_vld1_f32_x2(float32_t const *a) { @@ -13494,7 +13494,7 @@ // CHECK: store { <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x2_t [[TMP6]] float64x1x2_t test_vld1_f64_x2(float64_t const *a) { @@ -13510,7 +13510,7 @@ // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly8x8x2_t [[TMP4]] poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) { @@ -13528,7 +13528,7 @@ // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly16x4x2_t [[TMP6]] poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) { @@ -13546,7 +13546,7 @@ // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x2_t [[TMP6]] poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) { @@ -13562,7 +13562,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 48, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint8x16x3_t [[TMP4]] uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) { @@ -13580,7 +13580,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint16x8x3_t [[TMP6]] uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) { @@ -13598,7 +13598,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint32x4x3_t [[TMP6]] uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) { @@ -13616,7 +13616,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x3_t [[TMP6]] uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) { @@ -13632,7 +13632,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 48, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int8x16x3_t [[TMP4]] int8x16x3_t test_vld1q_s8_x3(int8_t const *a) { @@ -13650,7 +13650,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int16x8x3_t [[TMP6]] int16x8x3_t test_vld1q_s16_x3(int16_t const *a) { @@ -13668,7 +13668,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int32x4x3_t [[TMP6]] int32x4x3_t test_vld1q_s32_x3(int32_t const *a) { @@ -13686,7 +13686,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x3_t [[TMP6]] int64x2x3_t test_vld1q_s64_x3(int64_t const *a) { @@ -13704,7 +13704,7 @@ // CHECK: store { <8 x half>, <8 x half>, <8 x half> } [[VLD1XN]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float16x8x3_t [[TMP6]] float16x8x3_t test_vld1q_f16_x3(float16_t const *a) { @@ -13722,7 +13722,7 @@ // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float32x4x3_t [[TMP6]] float32x4x3_t test_vld1q_f32_x3(float32_t const *a) { @@ -13740,7 +13740,7 @@ // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x3_t [[TMP6]] float64x2x3_t test_vld1q_f64_x3(float64_t const *a) { @@ -13756,7 +13756,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 48, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly8x16x3_t [[TMP4]] poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) { @@ -13774,7 +13774,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly16x8x3_t [[TMP6]] poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) { @@ -13792,7 +13792,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x3_t [[TMP6]] poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) { @@ -13808,7 +13808,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 24, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint8x8x3_t [[TMP4]] uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) { @@ -13826,7 +13826,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint16x4x3_t [[TMP6]] uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) { @@ -13844,7 +13844,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint32x2x3_t [[TMP6]] uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) { @@ -13862,7 +13862,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint64x1x3_t [[TMP6]] uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) { @@ -13878,7 +13878,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 24, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int8x8x3_t [[TMP4]] int8x8x3_t test_vld1_s8_x3(int8_t const *a) { @@ -13896,7 +13896,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int16x4x3_t [[TMP6]] int16x4x3_t test_vld1_s16_x3(int16_t const *a) { @@ -13914,7 +13914,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int32x2x3_t [[TMP6]] int32x2x3_t test_vld1_s32_x3(int32_t const *a) { @@ -13932,7 +13932,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int64x1x3_t [[TMP6]] int64x1x3_t test_vld1_s64_x3(int64_t const *a) { @@ -13950,7 +13950,7 @@ // CHECK: store { <4 x half>, <4 x half>, <4 x half> } [[VLD1XN]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float16x4x3_t [[TMP6]] float16x4x3_t test_vld1_f16_x3(float16_t const *a) { @@ -13968,7 +13968,7 @@ // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float32x2x3_t [[TMP6]] float32x2x3_t test_vld1_f32_x3(float32_t const *a) { @@ -13986,7 +13986,7 @@ // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x3_t [[TMP6]] float64x1x3_t test_vld1_f64_x3(float64_t const *a) { @@ -14002,7 +14002,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 24, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly8x8x3_t [[TMP4]] poly8x8x3_t test_vld1_p8_x3(poly8_t const *a) { @@ -14020,7 +14020,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly16x4x3_t [[TMP6]] poly16x4x3_t test_vld1_p16_x3(poly16_t const *a) { @@ -14038,7 +14038,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x3_t [[TMP6]] poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) { @@ -14054,7 +14054,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint8x16x4_t [[TMP4]] uint8x16x4_t test_vld1q_u8_x4(uint8_t const *a) { @@ -14072,7 +14072,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint16x8x4_t [[TMP6]] uint16x8x4_t test_vld1q_u16_x4(uint16_t const *a) { @@ -14090,7 +14090,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint32x4x4_t [[TMP6]] uint32x4x4_t test_vld1q_u32_x4(uint32_t const *a) { @@ -14108,7 +14108,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x4_t [[TMP6]] uint64x2x4_t test_vld1q_u64_x4(uint64_t const *a) { @@ -14124,7 +14124,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int8x16x4_t [[TMP4]] int8x16x4_t test_vld1q_s8_x4(int8_t const *a) { @@ -14142,7 +14142,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int16x8x4_t [[TMP6]] int16x8x4_t test_vld1q_s16_x4(int16_t const *a) { @@ -14160,7 +14160,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int32x4x4_t [[TMP6]] int32x4x4_t test_vld1q_s32_x4(int32_t const *a) { @@ -14178,7 +14178,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x4_t [[TMP6]] int64x2x4_t test_vld1q_s64_x4(int64_t const *a) { @@ -14196,7 +14196,7 @@ // CHECK: store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD1XN]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float16x8x4_t [[TMP6]] float16x8x4_t test_vld1q_f16_x4(float16_t const *a) { @@ -14214,7 +14214,7 @@ // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float32x4x4_t [[TMP6]] float32x4x4_t test_vld1q_f32_x4(float32_t const *a) { @@ -14232,7 +14232,7 @@ // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x4_t [[TMP6]] float64x2x4_t test_vld1q_f64_x4(float64_t const *a) { @@ -14248,7 +14248,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly8x16x4_t [[TMP4]] poly8x16x4_t test_vld1q_p8_x4(poly8_t const *a) { @@ -14266,7 +14266,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly16x8x4_t [[TMP6]] poly16x8x4_t test_vld1q_p16_x4(poly16_t const *a) { @@ -14284,7 +14284,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x4_t [[TMP6]] poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) { @@ -14300,7 +14300,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 32, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint8x8x4_t [[TMP4]] uint8x8x4_t test_vld1_u8_x4(uint8_t const *a) { @@ -14318,7 +14318,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint16x4x4_t [[TMP6]] uint16x4x4_t test_vld1_u16_x4(uint16_t const *a) { @@ -14336,7 +14336,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint32x2x4_t [[TMP6]] uint32x2x4_t test_vld1_u32_x4(uint32_t const *a) { @@ -14354,7 +14354,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint64x1x4_t [[TMP6]] uint64x1x4_t test_vld1_u64_x4(uint64_t const *a) { @@ -14370,7 +14370,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 32, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int8x8x4_t [[TMP4]] int8x8x4_t test_vld1_s8_x4(int8_t const *a) { @@ -14388,7 +14388,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int16x4x4_t [[TMP6]] int16x4x4_t test_vld1_s16_x4(int16_t const *a) { @@ -14406,7 +14406,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int32x2x4_t [[TMP6]] int32x2x4_t test_vld1_s32_x4(int32_t const *a) { @@ -14424,7 +14424,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int64x1x4_t [[TMP6]] int64x1x4_t test_vld1_s64_x4(int64_t const *a) { @@ -14442,7 +14442,7 @@ // CHECK: store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD1XN]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float16x4x4_t [[TMP6]] float16x4x4_t test_vld1_f16_x4(float16_t const *a) { @@ -14460,7 +14460,7 @@ // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float32x2x4_t [[TMP6]] float32x2x4_t test_vld1_f32_x4(float32_t const *a) { @@ -14478,7 +14478,7 @@ // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x4_t [[TMP6]] float64x1x4_t test_vld1_f64_x4(float64_t const *a) { @@ -14494,7 +14494,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 32, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly8x8x4_t [[TMP4]] poly8x8x4_t test_vld1_p8_x4(poly8_t const *a) { @@ -14512,7 +14512,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly16x4x4_t [[TMP6]] poly16x4x4_t test_vld1_p16_x4(poly16_t const *a) { @@ -14530,7 +14530,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x4_t [[TMP6]] poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) { @@ -14544,7 +14544,7 @@ // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -14564,7 +14564,7 @@ // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -14590,7 +14590,7 @@ // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -14616,7 +14616,7 @@ // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -14642,7 +14642,7 @@ // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -14662,7 +14662,7 @@ // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -14688,7 +14688,7 @@ // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -14714,7 +14714,7 @@ // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -14740,7 +14740,7 @@ // CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0 @@ -14766,7 +14766,7 @@ // CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0 @@ -14792,7 +14792,7 @@ // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0 @@ -14818,7 +14818,7 @@ // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -14838,7 +14838,7 @@ // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -14864,7 +14864,7 @@ // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -14890,7 +14890,7 @@ // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -14910,7 +14910,7 @@ // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -14936,7 +14936,7 @@ // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -14962,7 +14962,7 @@ // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -14988,7 +14988,7 @@ // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -15008,7 +15008,7 @@ // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -15034,7 +15034,7 @@ // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -15060,7 +15060,7 @@ // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -15086,7 +15086,7 @@ // CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0 @@ -15112,7 +15112,7 @@ // CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0 @@ -15138,7 +15138,7 @@ // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0 @@ -15164,7 +15164,7 @@ // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -15184,7 +15184,7 @@ // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -15210,7 +15210,7 @@ // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -15236,7 +15236,7 @@ // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -15259,7 +15259,7 @@ // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -15290,7 +15290,7 @@ // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -15321,7 +15321,7 @@ // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -15352,7 +15352,7 @@ // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -15375,7 +15375,7 @@ // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -15406,7 +15406,7 @@ // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -15437,7 +15437,7 @@ // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -15468,7 +15468,7 @@ // CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0 @@ -15499,7 +15499,7 @@ // CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0 @@ -15530,7 +15530,7 @@ // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0 @@ -15561,7 +15561,7 @@ // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -15584,7 +15584,7 @@ // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -15615,7 +15615,7 @@ // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -15646,7 +15646,7 @@ // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -15669,7 +15669,7 @@ // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -15700,7 +15700,7 @@ // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -15731,7 +15731,7 @@ // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -15762,7 +15762,7 @@ // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -15785,7 +15785,7 @@ // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -15816,7 +15816,7 @@ // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -15847,7 +15847,7 @@ // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -15878,7 +15878,7 @@ // CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0 @@ -15909,7 +15909,7 @@ // CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0 @@ -15940,7 +15940,7 @@ // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0 @@ -15971,7 +15971,7 @@ // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -15994,7 +15994,7 @@ // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -16025,7 +16025,7 @@ // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -16056,7 +16056,7 @@ // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -16082,7 +16082,7 @@ // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -16118,7 +16118,7 @@ // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -16154,7 +16154,7 @@ // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -16190,7 +16190,7 @@ // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -16216,7 +16216,7 @@ // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -16252,7 +16252,7 @@ // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -16288,7 +16288,7 @@ // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -16324,7 +16324,7 @@ // CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0 @@ -16360,7 +16360,7 @@ // CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0 @@ -16396,7 +16396,7 @@ // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0 @@ -16432,7 +16432,7 @@ // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -16458,7 +16458,7 @@ // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -16494,7 +16494,7 @@ // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -16530,7 +16530,7 @@ // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -16556,7 +16556,7 @@ // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -16592,7 +16592,7 @@ // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -16628,7 +16628,7 @@ // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -16664,7 +16664,7 @@ // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -16690,7 +16690,7 @@ // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -16726,7 +16726,7 @@ // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -16762,7 +16762,7 @@ // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -16798,7 +16798,7 @@ // CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0 @@ -16834,7 +16834,7 @@ // CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0 @@ -16870,7 +16870,7 @@ // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0 @@ -16906,7 +16906,7 @@ // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -16932,7 +16932,7 @@ // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -16968,7 +16968,7 @@ // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 Index: test/CodeGen/aarch64-neon-ldst-one.c =================================================================== --- test/CodeGen/aarch64-neon-ldst-one.c +++ test/CodeGen/aarch64-neon-ldst-one.c @@ -309,7 +309,7 @@ // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 32, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint8x16x2_t [[TMP4]] uint8x16x2_t test_vld2q_dup_u8(uint8_t *a) { @@ -327,7 +327,7 @@ // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint16x8x2_t [[TMP6]] uint16x8x2_t test_vld2q_dup_u16(uint16_t *a) { @@ -345,7 +345,7 @@ // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint32x4x2_t [[TMP6]] uint32x4x2_t test_vld2q_dup_u32(uint32_t *a) { @@ -363,7 +363,7 @@ // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x2_t [[TMP6]] uint64x2x2_t test_vld2q_dup_u64(uint64_t *a) { @@ -379,7 +379,7 @@ // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 32, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int8x16x2_t [[TMP4]] int8x16x2_t test_vld2q_dup_s8(int8_t *a) { @@ -397,7 +397,7 @@ // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int16x8x2_t [[TMP6]] int16x8x2_t test_vld2q_dup_s16(int16_t *a) { @@ -415,7 +415,7 @@ // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int32x4x2_t [[TMP6]] int32x4x2_t test_vld2q_dup_s32(int32_t *a) { @@ -433,7 +433,7 @@ // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x2_t [[TMP6]] int64x2x2_t test_vld2q_dup_s64(int64_t *a) { @@ -451,7 +451,7 @@ // CHECK: store { <8 x half>, <8 x half> } [[VLD2]], { <8 x half>, <8 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float16x8x2_t [[TMP6]] float16x8x2_t test_vld2q_dup_f16(float16_t *a) { @@ -469,7 +469,7 @@ // CHECK: store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float32x4x2_t [[TMP6]] float32x4x2_t test_vld2q_dup_f32(float32_t *a) { @@ -487,7 +487,7 @@ // CHECK: store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x2_t [[TMP6]] float64x2x2_t test_vld2q_dup_f64(float64_t *a) { @@ -503,7 +503,7 @@ // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 32, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly8x16x2_t [[TMP4]] poly8x16x2_t test_vld2q_dup_p8(poly8_t *a) { @@ -521,7 +521,7 @@ // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly16x8x2_t [[TMP6]] poly16x8x2_t test_vld2q_dup_p16(poly16_t *a) { @@ -539,7 +539,7 @@ // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x2_t [[TMP6]] poly64x2x2_t test_vld2q_dup_p64(poly64_t *a) { @@ -555,7 +555,7 @@ // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint8x8x2_t [[TMP4]] uint8x8x2_t test_vld2_dup_u8(uint8_t *a) { @@ -573,7 +573,7 @@ // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint16x4x2_t [[TMP6]] uint16x4x2_t test_vld2_dup_u16(uint16_t *a) { @@ -591,7 +591,7 @@ // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint32x2x2_t [[TMP6]] uint32x2x2_t test_vld2_dup_u32(uint32_t *a) { @@ -609,7 +609,7 @@ // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint64x1x2_t [[TMP6]] uint64x1x2_t test_vld2_dup_u64(uint64_t *a) { @@ -625,7 +625,7 @@ // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int8x8x2_t [[TMP4]] int8x8x2_t test_vld2_dup_s8(int8_t *a) { @@ -643,7 +643,7 @@ // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int16x4x2_t [[TMP6]] int16x4x2_t test_vld2_dup_s16(int16_t *a) { @@ -661,7 +661,7 @@ // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int32x2x2_t [[TMP6]] int32x2x2_t test_vld2_dup_s32(int32_t *a) { @@ -679,7 +679,7 @@ // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int64x1x2_t [[TMP6]] int64x1x2_t test_vld2_dup_s64(int64_t *a) { @@ -697,7 +697,7 @@ // CHECK: store { <4 x half>, <4 x half> } [[VLD2]], { <4 x half>, <4 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float16x4x2_t [[TMP6]] float16x4x2_t test_vld2_dup_f16(float16_t *a) { @@ -715,7 +715,7 @@ // CHECK: store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float32x2x2_t [[TMP6]] float32x2x2_t test_vld2_dup_f32(float32_t *a) { @@ -733,7 +733,7 @@ // CHECK: store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x2_t [[TMP6]] float64x1x2_t test_vld2_dup_f64(float64_t *a) { @@ -749,7 +749,7 @@ // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly8x8x2_t [[TMP4]] poly8x8x2_t test_vld2_dup_p8(poly8_t *a) { @@ -767,7 +767,7 @@ // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly16x4x2_t [[TMP6]] poly16x4x2_t test_vld2_dup_p16(poly16_t *a) { @@ -785,7 +785,7 @@ // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x2_t [[TMP6]] poly64x1x2_t test_vld2_dup_p64(poly64_t *a) { @@ -801,7 +801,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 48, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint8x16x3_t [[TMP4]] uint8x16x3_t test_vld3q_dup_u8(uint8_t *a) { @@ -820,7 +820,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint16x8x3_t [[TMP6]] uint16x8x3_t test_vld3q_dup_u16(uint16_t *a) { @@ -839,7 +839,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint32x4x3_t [[TMP6]] uint32x4x3_t test_vld3q_dup_u32(uint32_t *a) { @@ -858,7 +858,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x3_t [[TMP6]] uint64x2x3_t test_vld3q_dup_u64(uint64_t *a) { @@ -875,7 +875,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 48, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int8x16x3_t [[TMP4]] int8x16x3_t test_vld3q_dup_s8(int8_t *a) { @@ -894,7 +894,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int16x8x3_t [[TMP6]] int16x8x3_t test_vld3q_dup_s16(int16_t *a) { @@ -913,7 +913,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int32x4x3_t [[TMP6]] int32x4x3_t test_vld3q_dup_s32(int32_t *a) { @@ -932,7 +932,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x3_t [[TMP6]] int64x2x3_t test_vld3q_dup_s64(int64_t *a) { @@ -951,7 +951,7 @@ // CHECK: store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float16x8x3_t [[TMP6]] float16x8x3_t test_vld3q_dup_f16(float16_t *a) { @@ -970,7 +970,7 @@ // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float32x4x3_t [[TMP6]] float32x4x3_t test_vld3q_dup_f32(float32_t *a) { @@ -989,7 +989,7 @@ // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x3_t [[TMP6]] float64x2x3_t test_vld3q_dup_f64(float64_t *a) { @@ -1006,7 +1006,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 48, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly8x16x3_t [[TMP4]] poly8x16x3_t test_vld3q_dup_p8(poly8_t *a) { @@ -1025,7 +1025,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly16x8x3_t [[TMP6]] poly16x8x3_t test_vld3q_dup_p16(poly16_t *a) { @@ -1044,7 +1044,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x3_t [[TMP6]] poly64x2x3_t test_vld3q_dup_p64(poly64_t *a) { @@ -1061,7 +1061,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 24, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint8x8x3_t [[TMP4]] uint8x8x3_t test_vld3_dup_u8(uint8_t *a) { @@ -1080,7 +1080,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint16x4x3_t [[TMP6]] uint16x4x3_t test_vld3_dup_u16(uint16_t *a) { @@ -1099,7 +1099,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint32x2x3_t [[TMP6]] uint32x2x3_t test_vld3_dup_u32(uint32_t *a) { @@ -1118,7 +1118,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint64x1x3_t [[TMP6]] uint64x1x3_t test_vld3_dup_u64(uint64_t *a) { @@ -1135,7 +1135,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 24, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int8x8x3_t [[TMP4]] int8x8x3_t test_vld3_dup_s8(int8_t *a) { @@ -1154,7 +1154,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int16x4x3_t [[TMP6]] int16x4x3_t test_vld3_dup_s16(int16_t *a) { @@ -1173,7 +1173,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int32x2x3_t [[TMP6]] int32x2x3_t test_vld3_dup_s32(int32_t *a) { @@ -1192,7 +1192,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int64x1x3_t [[TMP6]] int64x1x3_t test_vld3_dup_s64(int64_t *a) { @@ -1211,7 +1211,7 @@ // CHECK: store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float16x4x3_t [[TMP6]] float16x4x3_t test_vld3_dup_f16(float16_t *a) { @@ -1230,7 +1230,7 @@ // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float32x2x3_t [[TMP6]] float32x2x3_t test_vld3_dup_f32(float32_t *a) { @@ -1249,7 +1249,7 @@ // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x3_t [[TMP6]] float64x1x3_t test_vld3_dup_f64(float64_t *a) { @@ -1266,7 +1266,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 24, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly8x8x3_t [[TMP4]] poly8x8x3_t test_vld3_dup_p8(poly8_t *a) { @@ -1285,7 +1285,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly16x4x3_t [[TMP6]] poly16x4x3_t test_vld3_dup_p16(poly16_t *a) { @@ -1304,7 +1304,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x3_t [[TMP6]] poly64x1x3_t test_vld3_dup_p64(poly64_t *a) { @@ -1321,7 +1321,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint8x16x4_t [[TMP4]] uint8x16x4_t test_vld4q_dup_u8(uint8_t *a) { @@ -1339,7 +1339,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint16x8x4_t [[TMP6]] uint16x8x4_t test_vld4q_dup_u16(uint16_t *a) { @@ -1357,7 +1357,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint32x4x4_t [[TMP6]] uint32x4x4_t test_vld4q_dup_u32(uint32_t *a) { @@ -1375,7 +1375,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x4_t [[TMP6]] uint64x2x4_t test_vld4q_dup_u64(uint64_t *a) { @@ -1391,7 +1391,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int8x16x4_t [[TMP4]] int8x16x4_t test_vld4q_dup_s8(int8_t *a) { @@ -1409,7 +1409,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int16x8x4_t [[TMP6]] int16x8x4_t test_vld4q_dup_s16(int16_t *a) { @@ -1427,7 +1427,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int32x4x4_t [[TMP6]] int32x4x4_t test_vld4q_dup_s32(int32_t *a) { @@ -1445,7 +1445,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x4_t [[TMP6]] int64x2x4_t test_vld4q_dup_s64(int64_t *a) { @@ -1463,7 +1463,7 @@ // CHECK: store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float16x8x4_t [[TMP6]] float16x8x4_t test_vld4q_dup_f16(float16_t *a) { @@ -1481,7 +1481,7 @@ // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float32x4x4_t [[TMP6]] float32x4x4_t test_vld4q_dup_f32(float32_t *a) { @@ -1499,7 +1499,7 @@ // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x4_t [[TMP6]] float64x2x4_t test_vld4q_dup_f64(float64_t *a) { @@ -1515,7 +1515,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly8x16x4_t [[TMP4]] poly8x16x4_t test_vld4q_dup_p8(poly8_t *a) { @@ -1533,7 +1533,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly16x8x4_t [[TMP6]] poly16x8x4_t test_vld4q_dup_p16(poly16_t *a) { @@ -1551,7 +1551,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x4_t [[TMP6]] poly64x2x4_t test_vld4q_dup_p64(poly64_t *a) { @@ -1567,7 +1567,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 32, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint8x8x4_t [[TMP4]] uint8x8x4_t test_vld4_dup_u8(uint8_t *a) { @@ -1585,7 +1585,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint16x4x4_t [[TMP6]] uint16x4x4_t test_vld4_dup_u16(uint16_t *a) { @@ -1603,7 +1603,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint32x2x4_t [[TMP6]] uint32x2x4_t test_vld4_dup_u32(uint32_t *a) { @@ -1621,7 +1621,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint64x1x4_t [[TMP6]] uint64x1x4_t test_vld4_dup_u64(uint64_t *a) { @@ -1637,7 +1637,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 32, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int8x8x4_t [[TMP4]] int8x8x4_t test_vld4_dup_s8(int8_t *a) { @@ -1655,7 +1655,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int16x4x4_t [[TMP6]] int16x4x4_t test_vld4_dup_s16(int16_t *a) { @@ -1673,7 +1673,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int32x2x4_t [[TMP6]] int32x2x4_t test_vld4_dup_s32(int32_t *a) { @@ -1691,7 +1691,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int64x1x4_t [[TMP6]] int64x1x4_t test_vld4_dup_s64(int64_t *a) { @@ -1709,7 +1709,7 @@ // CHECK: store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float16x4x4_t [[TMP6]] float16x4x4_t test_vld4_dup_f16(float16_t *a) { @@ -1727,7 +1727,7 @@ // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float32x2x4_t [[TMP6]] float32x2x4_t test_vld4_dup_f32(float32_t *a) { @@ -1745,7 +1745,7 @@ // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x4_t [[TMP6]] float64x1x4_t test_vld4_dup_f64(float64_t *a) { @@ -1761,7 +1761,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 32, i1 false) // CHECK: [[TMP4:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly8x8x4_t [[TMP4]] poly8x8x4_t test_vld4_dup_p8(poly8_t *a) { @@ -1779,7 +1779,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly16x4x4_t [[TMP6]] poly16x4x4_t test_vld4_dup_p16(poly16_t *a) { @@ -1797,7 +1797,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x4_t [[TMP6]] poly64x1x4_t test_vld4_dup_p64(poly64_t *a) { @@ -2125,7 +2125,7 @@ // CHECK: store [2 x <16 x i8>] [[SRC]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[SRC]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -2138,7 +2138,7 @@ // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], { <16 x i8>, <16 x i8> }* [[TMP5]] // CHECK: [[TMP6:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8* // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP6]], i8* align 16 [[TMP7]], i64 32, i1 false) // CHECK: [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int8x16x2_t [[TMP8]] int8x16x2_t test_vld2q_lane_s8(int8_t const * ptr, int8x16x2_t src) { @@ -2154,7 +2154,7 @@ // CHECK: store [2 x <16 x i8>] [[SRC]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[SRC]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -2167,7 +2167,7 @@ // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], { <16 x i8>, <16 x i8> }* [[TMP5]] // CHECK: [[TMP6:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8* // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP6]], i8* align 16 [[TMP7]], i64 32, i1 false) // CHECK: [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint8x16x2_t [[TMP8]] uint8x16x2_t test_vld2q_lane_u8(uint8_t const * ptr, uint8x16x2_t src) { @@ -2183,7 +2183,7 @@ // CHECK: store [2 x <16 x i8>] [[SRC]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[SRC]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -2196,7 +2196,7 @@ // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], { <16 x i8>, <16 x i8> }* [[TMP5]] // CHECK: [[TMP6:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8* // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP6]], i8* align 16 [[TMP7]], i64 32, i1 false) // CHECK: [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly8x16x2_t [[TMP8]] poly8x16x2_t test_vld2q_lane_p8(poly8_t const * ptr, poly8x16x2_t src) { @@ -2212,7 +2212,7 @@ // CHECK: store [3 x <16 x i8>] [[SRC]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[SRC]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -2228,7 +2228,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP6]] // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8* // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP7]], i8* align 16 [[TMP8]], i64 48, i1 false) // CHECK: [[TMP9:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int8x16x3_t [[TMP9]] int8x16x3_t test_vld3q_lane_s8(int8_t const * ptr, int8x16x3_t src) { @@ -2244,7 +2244,7 @@ // CHECK: store [3 x <16 x i8>] [[SRC]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[SRC]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -2260,7 +2260,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP6]] // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8* // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP7]], i8* align 16 [[TMP8]], i64 48, i1 false) // CHECK: [[TMP9:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint8x16x3_t [[TMP9]] uint8x16x3_t test_vld3q_lane_u8(uint8_t const * ptr, uint8x16x3_t src) { @@ -2276,7 +2276,7 @@ // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 @@ -2294,7 +2294,7 @@ // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint16x8x2_t [[TMP13]] uint16x8x2_t test_vld2q_lane_u16(uint16_t *a, uint16x8x2_t b) { @@ -2310,7 +2310,7 @@ // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 @@ -2328,7 +2328,7 @@ // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], { <4 x i32>, <4 x i32> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint32x4x2_t [[TMP13]] uint32x4x2_t test_vld2q_lane_u32(uint32_t *a, uint32x4x2_t b) { @@ -2344,7 +2344,7 @@ // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 @@ -2362,7 +2362,7 @@ // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x2_t [[TMP13]] uint64x2x2_t test_vld2q_lane_u64(uint64_t *a, uint64x2x2_t b) { @@ -2378,7 +2378,7 @@ // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 @@ -2396,7 +2396,7 @@ // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int16x8x2_t [[TMP13]] int16x8x2_t test_vld2q_lane_s16(int16_t *a, int16x8x2_t b) { @@ -2412,7 +2412,7 @@ // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 @@ -2430,7 +2430,7 @@ // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], { <4 x i32>, <4 x i32> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int32x4x2_t [[TMP13]] int32x4x2_t test_vld2q_lane_s32(int32_t *a, int32x4x2_t b) { @@ -2446,7 +2446,7 @@ // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 @@ -2464,7 +2464,7 @@ // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x2_t [[TMP13]] int64x2x2_t test_vld2q_lane_s64(int64_t *a, int64x2x2_t b) { @@ -2480,7 +2480,7 @@ // CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 @@ -2498,7 +2498,7 @@ // CHECK: store { <8 x half>, <8 x half> } [[VLD2_LANE]], { <8 x half>, <8 x half> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float16x8x2_t [[TMP13]] float16x8x2_t test_vld2q_lane_f16(float16_t *a, float16x8x2_t b) { @@ -2514,7 +2514,7 @@ // CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 @@ -2532,7 +2532,7 @@ // CHECK: store { <4 x float>, <4 x float> } [[VLD2_LANE]], { <4 x float>, <4 x float> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float32x4x2_t [[TMP13]] float32x4x2_t test_vld2q_lane_f32(float32_t *a, float32x4x2_t b) { @@ -2548,7 +2548,7 @@ // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 @@ -2566,7 +2566,7 @@ // CHECK: store { <2 x double>, <2 x double> } [[VLD2_LANE]], { <2 x double>, <2 x double> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x2_t [[TMP13]] float64x2x2_t test_vld2q_lane_f64(float64_t *a, float64x2x2_t b) { @@ -2582,7 +2582,7 @@ // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 @@ -2600,7 +2600,7 @@ // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly16x8x2_t [[TMP13]] poly16x8x2_t test_vld2q_lane_p16(poly16_t *a, poly16x8x2_t b) { @@ -2616,7 +2616,7 @@ // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 @@ -2634,7 +2634,7 @@ // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x2_t [[TMP13]] poly64x2x2_t test_vld2q_lane_p64(poly64_t *a, poly64x2x2_t b) { @@ -2650,7 +2650,7 @@ // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -2663,7 +2663,7 @@ // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]] // CHECK: [[TMP6:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 [[TMP7]], i64 16, i1 false) // CHECK: [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint8x8x2_t [[TMP8]] uint8x8x2_t test_vld2_lane_u8(uint8_t *a, uint8x8x2_t b) { @@ -2679,7 +2679,7 @@ // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 @@ -2697,7 +2697,7 @@ // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint16x4x2_t [[TMP13]] uint16x4x2_t test_vld2_lane_u16(uint16_t *a, uint16x4x2_t b) { @@ -2713,7 +2713,7 @@ // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 @@ -2731,7 +2731,7 @@ // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], { <2 x i32>, <2 x i32> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint32x2x2_t [[TMP13]] uint32x2x2_t test_vld2_lane_u32(uint32_t *a, uint32x2x2_t b) { @@ -2747,7 +2747,7 @@ // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 @@ -2765,7 +2765,7 @@ // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint64x1x2_t [[TMP13]] uint64x1x2_t test_vld2_lane_u64(uint64_t *a, uint64x1x2_t b) { @@ -2781,7 +2781,7 @@ // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -2794,7 +2794,7 @@ // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]] // CHECK: [[TMP6:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 [[TMP7]], i64 16, i1 false) // CHECK: [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int8x8x2_t [[TMP8]] int8x8x2_t test_vld2_lane_s8(int8_t *a, int8x8x2_t b) { @@ -2810,7 +2810,7 @@ // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 @@ -2828,7 +2828,7 @@ // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int16x4x2_t [[TMP13]] int16x4x2_t test_vld2_lane_s16(int16_t *a, int16x4x2_t b) { @@ -2844,7 +2844,7 @@ // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 @@ -2862,7 +2862,7 @@ // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], { <2 x i32>, <2 x i32> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int32x2x2_t [[TMP13]] int32x2x2_t test_vld2_lane_s32(int32_t *a, int32x2x2_t b) { @@ -2878,7 +2878,7 @@ // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 @@ -2896,7 +2896,7 @@ // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int64x1x2_t [[TMP13]] int64x1x2_t test_vld2_lane_s64(int64_t *a, int64x1x2_t b) { @@ -2912,7 +2912,7 @@ // CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 @@ -2930,7 +2930,7 @@ // CHECK: store { <4 x half>, <4 x half> } [[VLD2_LANE]], { <4 x half>, <4 x half> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float16x4x2_t [[TMP13]] float16x4x2_t test_vld2_lane_f16(float16_t *a, float16x4x2_t b) { @@ -2946,7 +2946,7 @@ // CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 @@ -2964,7 +2964,7 @@ // CHECK: store { <2 x float>, <2 x float> } [[VLD2_LANE]], { <2 x float>, <2 x float> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float32x2x2_t [[TMP13]] float32x2x2_t test_vld2_lane_f32(float32_t *a, float32x2x2_t b) { @@ -2980,7 +2980,7 @@ // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 @@ -2998,7 +2998,7 @@ // CHECK: store { <1 x double>, <1 x double> } [[VLD2_LANE]], { <1 x double>, <1 x double> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x2_t [[TMP13]] float64x1x2_t test_vld2_lane_f64(float64_t *a, float64x1x2_t b) { @@ -3014,7 +3014,7 @@ // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -3027,7 +3027,7 @@ // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]] // CHECK: [[TMP6:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 [[TMP7]], i64 16, i1 false) // CHECK: [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly8x8x2_t [[TMP8]] poly8x8x2_t test_vld2_lane_p8(poly8_t *a, poly8x8x2_t b) { @@ -3043,7 +3043,7 @@ // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 @@ -3061,7 +3061,7 @@ // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly16x4x2_t [[TMP13]] poly16x4x2_t test_vld2_lane_p16(poly16_t *a, poly16x4x2_t b) { @@ -3077,7 +3077,7 @@ // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 @@ -3095,7 +3095,7 @@ // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x2_t [[TMP13]] poly64x1x2_t test_vld2_lane_p64(poly64_t *a, poly64x1x2_t b) { @@ -3111,7 +3111,7 @@ // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 @@ -3134,7 +3134,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint16x8x3_t [[TMP16]] uint16x8x3_t test_vld3q_lane_u16(uint16_t *a, uint16x8x3_t b) { @@ -3150,7 +3150,7 @@ // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 @@ -3173,7 +3173,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint32x4x3_t [[TMP16]] uint32x4x3_t test_vld3q_lane_u32(uint32_t *a, uint32x4x3_t b) { @@ -3189,7 +3189,7 @@ // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 @@ -3212,7 +3212,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x3_t [[TMP16]] uint64x2x3_t test_vld3q_lane_u64(uint64_t *a, uint64x2x3_t b) { @@ -3228,7 +3228,7 @@ // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 @@ -3251,7 +3251,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int16x8x3_t [[TMP16]] int16x8x3_t test_vld3q_lane_s16(int16_t *a, int16x8x3_t b) { @@ -3267,7 +3267,7 @@ // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 @@ -3290,7 +3290,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int32x4x3_t [[TMP16]] int32x4x3_t test_vld3q_lane_s32(int32_t *a, int32x4x3_t b) { @@ -3306,7 +3306,7 @@ // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 @@ -3329,7 +3329,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x3_t [[TMP16]] int64x2x3_t test_vld3q_lane_s64(int64_t *a, int64x2x3_t b) { @@ -3345,7 +3345,7 @@ // CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 @@ -3368,7 +3368,7 @@ // CHECK: store { <8 x half>, <8 x half>, <8 x half> } [[VLD3_LANE]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float16x8x3_t [[TMP16]] float16x8x3_t test_vld3q_lane_f16(float16_t *a, float16x8x3_t b) { @@ -3384,7 +3384,7 @@ // CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 @@ -3407,7 +3407,7 @@ // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3_LANE]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float32x4x3_t [[TMP16]] float32x4x3_t test_vld3q_lane_f32(float32_t *a, float32x4x3_t b) { @@ -3423,7 +3423,7 @@ // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 @@ -3446,7 +3446,7 @@ // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3_LANE]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x3_t [[TMP16]] float64x2x3_t test_vld3q_lane_f64(float64_t *a, float64x2x3_t b) { @@ -3462,7 +3462,7 @@ // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -3478,7 +3478,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP6]] // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8* // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP7]], i8* align 16 [[TMP8]], i64 48, i1 false) // CHECK: [[TMP9:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly8x16x3_t [[TMP9]] poly8x16x3_t test_vld3q_lane_p8(poly8_t *a, poly8x16x3_t b) { @@ -3494,7 +3494,7 @@ // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 @@ -3517,7 +3517,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly16x8x3_t [[TMP16]] poly16x8x3_t test_vld3q_lane_p16(poly16_t *a, poly16x8x3_t b) { @@ -3533,7 +3533,7 @@ // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 @@ -3556,7 +3556,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x3_t [[TMP16]] poly64x2x3_t test_vld3q_lane_p64(poly64_t *a, poly64x2x3_t b) { @@ -3572,7 +3572,7 @@ // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -3588,7 +3588,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]] // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 [[TMP8]], i64 24, i1 false) // CHECK: [[TMP9:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint8x8x3_t [[TMP9]] uint8x8x3_t test_vld3_lane_u8(uint8_t *a, uint8x8x3_t b) { @@ -3604,7 +3604,7 @@ // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 @@ -3627,7 +3627,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint16x4x3_t [[TMP16]] uint16x4x3_t test_vld3_lane_u16(uint16_t *a, uint16x4x3_t b) { @@ -3643,7 +3643,7 @@ // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 @@ -3666,7 +3666,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint32x2x3_t [[TMP16]] uint32x2x3_t test_vld3_lane_u32(uint32_t *a, uint32x2x3_t b) { @@ -3682,7 +3682,7 @@ // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 @@ -3705,7 +3705,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint64x1x3_t [[TMP16]] uint64x1x3_t test_vld3_lane_u64(uint64_t *a, uint64x1x3_t b) { @@ -3721,7 +3721,7 @@ // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -3737,7 +3737,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]] // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 [[TMP8]], i64 24, i1 false) // CHECK: [[TMP9:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int8x8x3_t [[TMP9]] int8x8x3_t test_vld3_lane_s8(int8_t *a, int8x8x3_t b) { @@ -3753,7 +3753,7 @@ // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 @@ -3776,7 +3776,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int16x4x3_t [[TMP16]] int16x4x3_t test_vld3_lane_s16(int16_t *a, int16x4x3_t b) { @@ -3792,7 +3792,7 @@ // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 @@ -3815,7 +3815,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int32x2x3_t [[TMP16]] int32x2x3_t test_vld3_lane_s32(int32_t *a, int32x2x3_t b) { @@ -3831,7 +3831,7 @@ // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 @@ -3854,7 +3854,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int64x1x3_t [[TMP16]] int64x1x3_t test_vld3_lane_s64(int64_t *a, int64x1x3_t b) { @@ -3870,7 +3870,7 @@ // CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 @@ -3893,7 +3893,7 @@ // CHECK: store { <4 x half>, <4 x half>, <4 x half> } [[VLD3_LANE]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float16x4x3_t [[TMP16]] float16x4x3_t test_vld3_lane_f16(float16_t *a, float16x4x3_t b) { @@ -3909,7 +3909,7 @@ // CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 @@ -3932,7 +3932,7 @@ // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float32x2x3_t [[TMP16]] float32x2x3_t test_vld3_lane_f32(float32_t *a, float32x2x3_t b) { @@ -3948,7 +3948,7 @@ // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 @@ -3971,7 +3971,7 @@ // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3_LANE]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x3_t [[TMP16]] float64x1x3_t test_vld3_lane_f64(float64_t *a, float64x1x3_t b) { @@ -3987,7 +3987,7 @@ // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -4003,7 +4003,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]] // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 [[TMP8]], i64 24, i1 false) // CHECK: [[TMP9:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly8x8x3_t [[TMP9]] poly8x8x3_t test_vld3_lane_p8(poly8_t *a, poly8x8x3_t b) { @@ -4019,7 +4019,7 @@ // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 @@ -4042,7 +4042,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly16x4x3_t [[TMP16]] poly16x4x3_t test_vld3_lane_p16(poly16_t *a, poly16x4x3_t b) { @@ -4058,7 +4058,7 @@ // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 @@ -4081,7 +4081,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x3_t [[TMP16]] poly64x1x3_t test_vld3_lane_p64(poly64_t *a, poly64x1x3_t b) { @@ -4097,7 +4097,7 @@ // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -4116,7 +4116,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP7]] // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8* // CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP8]], i8* align 16 [[TMP9]], i64 64, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint8x16x4_t [[TMP10]] uint8x16x4_t test_vld4q_lane_u8(uint8_t *a, uint8x16x4_t b) { @@ -4132,7 +4132,7 @@ // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 @@ -4160,7 +4160,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint16x8x4_t [[TMP19]] uint16x8x4_t test_vld4q_lane_u16(uint16_t *a, uint16x8x4_t b) { @@ -4176,7 +4176,7 @@ // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 @@ -4204,7 +4204,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint32x4x4_t [[TMP19]] uint32x4x4_t test_vld4q_lane_u32(uint32_t *a, uint32x4x4_t b) { @@ -4220,7 +4220,7 @@ // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 @@ -4248,7 +4248,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x4_t [[TMP19]] uint64x2x4_t test_vld4q_lane_u64(uint64_t *a, uint64x2x4_t b) { @@ -4264,7 +4264,7 @@ // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -4283,7 +4283,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP7]] // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8* // CHECK: [[TMP9:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP8]], i8* align 16 [[TMP9]], i64 64, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int8x16x4_t [[TMP10]] int8x16x4_t test_vld4q_lane_s8(int8_t *a, int8x16x4_t b) { @@ -4299,7 +4299,7 @@ // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 @@ -4327,7 +4327,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int16x8x4_t [[TMP19]] int16x8x4_t test_vld4q_lane_s16(int16_t *a, int16x8x4_t b) { @@ -4343,7 +4343,7 @@ // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 @@ -4371,7 +4371,7 @@ // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int32x4x4_t [[TMP19]] int32x4x4_t test_vld4q_lane_s32(int32_t *a, int32x4x4_t b) { @@ -4387,7 +4387,7 @@ // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 @@ -4415,7 +4415,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x4_t [[TMP19]] int64x2x4_t test_vld4q_lane_s64(int64_t *a, int64x2x4_t b) { @@ -4431,7 +4431,7 @@ // CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 @@ -4459,7 +4459,7 @@ // CHECK: store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4_LANE]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float16x8x4_t [[TMP19]] float16x8x4_t test_vld4q_lane_f16(float16_t *a, float16x8x4_t b) { @@ -4475,7 +4475,7 @@ // CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 @@ -4503,7 +4503,7 @@ // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4_LANE]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float32x4x4_t [[TMP19]] float32x4x4_t test_vld4q_lane_f32(float32_t *a, float32x4x4_t b) { @@ -4519,7 +4519,7 @@ // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 @@ -4547,7 +4547,7 @@ // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4_LANE]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x4_t [[TMP19]] float64x2x4_t test_vld4q_lane_f64(float64_t *a, float64x2x4_t b) { @@ -4563,7 +4563,7 @@ // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -4582,7 +4582,7 @@ // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP7]] // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8* // CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP8]], i8* align 16 [[TMP9]], i64 64, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly8x16x4_t [[TMP10]] poly8x16x4_t test_vld4q_lane_p8(poly8_t *a, poly8x16x4_t b) { @@ -4598,7 +4598,7 @@ // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 @@ -4626,7 +4626,7 @@ // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly16x8x4_t [[TMP19]] poly16x8x4_t test_vld4q_lane_p16(poly16_t *a, poly16x8x4_t b) { @@ -4642,7 +4642,7 @@ // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 @@ -4670,7 +4670,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x4_t [[TMP19]] poly64x2x4_t test_vld4q_lane_p64(poly64_t *a, poly64x2x4_t b) { @@ -4686,7 +4686,7 @@ // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -4705,7 +4705,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 [[TMP9]], i64 32, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint8x8x4_t [[TMP10]] uint8x8x4_t test_vld4_lane_u8(uint8_t *a, uint8x8x4_t b) { @@ -4721,7 +4721,7 @@ // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 @@ -4749,7 +4749,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint16x4x4_t [[TMP19]] uint16x4x4_t test_vld4_lane_u16(uint16_t *a, uint16x4x4_t b) { @@ -4765,7 +4765,7 @@ // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 @@ -4793,7 +4793,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint32x2x4_t [[TMP19]] uint32x2x4_t test_vld4_lane_u32(uint32_t *a, uint32x2x4_t b) { @@ -4809,7 +4809,7 @@ // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 @@ -4837,7 +4837,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint64x1x4_t [[TMP19]] uint64x1x4_t test_vld4_lane_u64(uint64_t *a, uint64x1x4_t b) { @@ -4853,7 +4853,7 @@ // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -4872,7 +4872,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 [[TMP9]], i64 32, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int8x8x4_t [[TMP10]] int8x8x4_t test_vld4_lane_s8(int8_t *a, int8x8x4_t b) { @@ -4888,7 +4888,7 @@ // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 @@ -4916,7 +4916,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int16x4x4_t [[TMP19]] int16x4x4_t test_vld4_lane_s16(int16_t *a, int16x4x4_t b) { @@ -4932,7 +4932,7 @@ // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 @@ -4960,7 +4960,7 @@ // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int32x2x4_t [[TMP19]] int32x2x4_t test_vld4_lane_s32(int32_t *a, int32x2x4_t b) { @@ -4976,7 +4976,7 @@ // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 @@ -5004,7 +5004,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int64x1x4_t [[TMP19]] int64x1x4_t test_vld4_lane_s64(int64_t *a, int64x1x4_t b) { @@ -5020,7 +5020,7 @@ // CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 @@ -5048,7 +5048,7 @@ // CHECK: store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_LANE]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float16x4x4_t [[TMP19]] float16x4x4_t test_vld4_lane_f16(float16_t *a, float16x4x4_t b) { @@ -5064,7 +5064,7 @@ // CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 @@ -5092,7 +5092,7 @@ // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float32x2x4_t [[TMP19]] float32x2x4_t test_vld4_lane_f32(float32_t *a, float32x2x4_t b) { @@ -5108,7 +5108,7 @@ // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 @@ -5136,7 +5136,7 @@ // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4_LANE]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x4_t [[TMP19]] float64x1x4_t test_vld4_lane_f64(float64_t *a, float64x1x4_t b) { @@ -5152,7 +5152,7 @@ // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -5171,7 +5171,7 @@ // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 [[TMP9]], i64 32, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly8x8x4_t [[TMP10]] poly8x8x4_t test_vld4_lane_p8(poly8_t *a, poly8x8x4_t b) { @@ -5187,7 +5187,7 @@ // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 @@ -5215,7 +5215,7 @@ // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly16x4x4_t [[TMP19]] poly16x4x4_t test_vld4_lane_p16(poly16_t *a, poly16x4x4_t b) { @@ -5231,7 +5231,7 @@ // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 @@ -5259,7 +5259,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x4_t [[TMP19]] poly64x1x4_t test_vld4_lane_p64(poly64_t *a, poly64x1x4_t b) { @@ -5585,7 +5585,7 @@ // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -5605,7 +5605,7 @@ // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -5630,7 +5630,7 @@ // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -5655,7 +5655,7 @@ // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -5680,7 +5680,7 @@ // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -5700,7 +5700,7 @@ // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -5725,7 +5725,7 @@ // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -5750,7 +5750,7 @@ // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -5775,7 +5775,7 @@ // CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0 @@ -5800,7 +5800,7 @@ // CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0 @@ -5825,7 +5825,7 @@ // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0 @@ -5850,7 +5850,7 @@ // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -5870,7 +5870,7 @@ // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -5895,7 +5895,7 @@ // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -5920,7 +5920,7 @@ // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -5940,7 +5940,7 @@ // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -5965,7 +5965,7 @@ // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -5990,7 +5990,7 @@ // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -6015,7 +6015,7 @@ // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -6035,7 +6035,7 @@ // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -6060,7 +6060,7 @@ // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -6085,7 +6085,7 @@ // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -6110,7 +6110,7 @@ // CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0 @@ -6135,7 +6135,7 @@ // CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0 @@ -6160,7 +6160,7 @@ // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0 @@ -6185,7 +6185,7 @@ // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -6205,7 +6205,7 @@ // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -6230,7 +6230,7 @@ // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -6255,7 +6255,7 @@ // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -6278,7 +6278,7 @@ // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -6308,7 +6308,7 @@ // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -6338,7 +6338,7 @@ // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -6368,7 +6368,7 @@ // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -6391,7 +6391,7 @@ // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -6421,7 +6421,7 @@ // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -6451,7 +6451,7 @@ // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -6481,7 +6481,7 @@ // CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0 @@ -6511,7 +6511,7 @@ // CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0 @@ -6541,7 +6541,7 @@ // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0 @@ -6571,7 +6571,7 @@ // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -6594,7 +6594,7 @@ // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -6624,7 +6624,7 @@ // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -6654,7 +6654,7 @@ // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -6677,7 +6677,7 @@ // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -6707,7 +6707,7 @@ // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -6737,7 +6737,7 @@ // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -6767,7 +6767,7 @@ // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -6790,7 +6790,7 @@ // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -6820,7 +6820,7 @@ // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -6850,7 +6850,7 @@ // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -6880,7 +6880,7 @@ // CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0 @@ -6910,7 +6910,7 @@ // CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0 @@ -6940,7 +6940,7 @@ // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0 @@ -6970,7 +6970,7 @@ // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -6993,7 +6993,7 @@ // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -7023,7 +7023,7 @@ // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -7053,7 +7053,7 @@ // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -7079,7 +7079,7 @@ // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -7114,7 +7114,7 @@ // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -7149,7 +7149,7 @@ // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -7184,7 +7184,7 @@ // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -7210,7 +7210,7 @@ // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -7245,7 +7245,7 @@ // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -7280,7 +7280,7 @@ // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -7315,7 +7315,7 @@ // CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0 @@ -7350,7 +7350,7 @@ // CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0 @@ -7385,7 +7385,7 @@ // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0 @@ -7420,7 +7420,7 @@ // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -7446,7 +7446,7 @@ // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -7481,7 +7481,7 @@ // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -7516,7 +7516,7 @@ // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -7542,7 +7542,7 @@ // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -7577,7 +7577,7 @@ // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -7612,7 +7612,7 @@ // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -7647,7 +7647,7 @@ // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -7673,7 +7673,7 @@ // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -7708,7 +7708,7 @@ // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -7743,7 +7743,7 @@ // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -7778,7 +7778,7 @@ // CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0 @@ -7813,7 +7813,7 @@ // CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0 @@ -7848,7 +7848,7 @@ // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0 @@ -7883,7 +7883,7 @@ // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -7909,7 +7909,7 @@ // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -7944,7 +7944,7 @@ // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 Index: test/CodeGen/aarch64-neon-perm.c =================================================================== --- test/CodeGen/aarch64-neon-perm.c +++ test/CodeGen/aarch64-neon-perm.c @@ -899,7 +899,7 @@ // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0 @@ -925,7 +925,7 @@ // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0 @@ -951,7 +951,7 @@ // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0 @@ -975,7 +975,7 @@ // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0 @@ -1001,7 +1001,7 @@ // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0 @@ -1027,7 +1027,7 @@ // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0 @@ -1053,7 +1053,7 @@ // CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0 @@ -1077,7 +1077,7 @@ // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0 @@ -1103,7 +1103,7 @@ // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0 @@ -1127,7 +1127,7 @@ // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0 @@ -1153,7 +1153,7 @@ // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0 @@ -1179,7 +1179,7 @@ // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0 @@ -1203,7 +1203,7 @@ // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0 @@ -1229,7 +1229,7 @@ // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0 @@ -1255,7 +1255,7 @@ // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0 @@ -1281,7 +1281,7 @@ // CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0 @@ -1305,7 +1305,7 @@ // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0 @@ -1331,7 +1331,7 @@ // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0 @@ -1355,7 +1355,7 @@ // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0 @@ -1381,7 +1381,7 @@ // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0 @@ -1407,7 +1407,7 @@ // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0 @@ -1431,7 +1431,7 @@ // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0 @@ -1457,7 +1457,7 @@ // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0 @@ -1483,7 +1483,7 @@ // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0 @@ -1509,7 +1509,7 @@ // CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0 @@ -1533,7 +1533,7 @@ // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0 @@ -1559,7 +1559,7 @@ // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0 @@ -1583,7 +1583,7 @@ // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0 @@ -1609,7 +1609,7 @@ // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0 @@ -1635,7 +1635,7 @@ // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0 @@ -1659,7 +1659,7 @@ // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0 @@ -1685,7 +1685,7 @@ // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0 @@ -1711,7 +1711,7 @@ // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0 @@ -1737,7 +1737,7 @@ // CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0 @@ -1761,7 +1761,7 @@ // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0 @@ -1787,7 +1787,7 @@ // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0 @@ -1811,7 +1811,7 @@ // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0 @@ -1837,7 +1837,7 @@ // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0 @@ -1863,7 +1863,7 @@ // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0 @@ -1887,7 +1887,7 @@ // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0 @@ -1913,7 +1913,7 @@ // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0 @@ -1939,7 +1939,7 @@ // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0 @@ -1965,7 +1965,7 @@ // CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0 @@ -1989,7 +1989,7 @@ // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0 @@ -2015,7 +2015,7 @@ // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0 @@ -2039,7 +2039,7 @@ // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0 @@ -2065,7 +2065,7 @@ // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0 @@ -2091,7 +2091,7 @@ // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0 @@ -2115,7 +2115,7 @@ // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0 @@ -2141,7 +2141,7 @@ // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0 @@ -2167,7 +2167,7 @@ // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0 @@ -2193,7 +2193,7 @@ // CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0 @@ -2217,7 +2217,7 @@ // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]] // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0 @@ -2243,7 +2243,7 @@ // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]] // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0 Index: test/CodeGen/aarch64-poly64.c =================================================================== --- test/CodeGen/aarch64-poly64.c +++ test/CodeGen/aarch64-poly64.c @@ -248,7 +248,7 @@ // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x2_t [[TMP6]] poly64x1x2_t test_vld2_p64(poly64_t const * ptr) { @@ -266,7 +266,7 @@ // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x2_t [[TMP6]] poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) { @@ -284,7 +284,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x3_t [[TMP6]] poly64x1x3_t test_vld3_p64(poly64_t const * ptr) { @@ -302,7 +302,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x3_t [[TMP6]] poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) { @@ -320,7 +320,7 @@ // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x4_t [[TMP6]] poly64x1x4_t test_vld4_p64(poly64_t const * ptr) { @@ -338,7 +338,7 @@ // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x4_t [[TMP6]] poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) { @@ -352,7 +352,7 @@ // CHECK: store [2 x <1 x i64>] [[VAL]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[VAL]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 0 @@ -377,7 +377,7 @@ // CHECK: store [2 x <2 x i64>] [[VAL]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[VAL]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 0 @@ -402,7 +402,7 @@ // CHECK: store [3 x <1 x i64>] [[VAL]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[VAL]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 0 @@ -432,7 +432,7 @@ // CHECK: store [3 x <2 x i64>] [[VAL]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[VAL]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 0 @@ -462,7 +462,7 @@ // CHECK: store [4 x <1 x i64>] [[VAL]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[VAL]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 0 @@ -497,7 +497,7 @@ // CHECK: store [4 x <2 x i64>] [[VAL]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[VAL]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 0 Index: test/CodeGen/aarch64-v8.2a-neon-intrinsics.c =================================================================== --- test/CodeGen/aarch64-v8.2a-neon-intrinsics.c +++ test/CodeGen/aarch64-v8.2a-neon-intrinsics.c @@ -1359,7 +1359,7 @@ // CHECK: store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]] // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) { return vzip_f16(a, b); } @@ -1376,7 +1376,7 @@ // CHECK: store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]] // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) { return vzipq_f16(a, b); } @@ -1393,7 +1393,7 @@ // CHECK: store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]] // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) { return vuzp_f16(a, b); } @@ -1410,7 +1410,7 @@ // CHECK: store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]] // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) { return vuzpq_f16(a, b); } @@ -1427,7 +1427,7 @@ // CHECK: store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]] // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) { return vtrn_f16(a, b); } @@ -1444,7 +1444,7 @@ // CHECK: store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]] // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) { return vtrnq_f16(a, b); } Index: test/CodeGen/arm-arguments.c =================================================================== --- test/CodeGen/arm-arguments.c +++ test/CodeGen/arm-arguments.c @@ -208,13 +208,13 @@ // APCS-GNU: %[[a:.*]] = alloca %struct.s35, align 16 // APCS-GNU: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8* // APCS-GNU: %[[c:.*]] = bitcast %struct.s35* %0 to i8* -// APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]] +// APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align {{[0-9]+}} %[[b]], i8* align {{[0-9]+}} %[[c]] // APCS-GNU: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>* // APCS-GNU: load <4 x float>, <4 x float>* %[[d]], align 16 // AAPCS-LABEL: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval align 8, %struct.s35* byval align 8) // AAPCS: %[[a:.*]] = alloca %struct.s35, align 16 // AAPCS: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8* // AAPCS: %[[c:.*]] = bitcast %struct.s35* %0 to i8* -// AAPCS: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]] +// AAPCS: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %[[b]], i8* align 8 %[[c]] // AAPCS: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>* // AAPCS: load <4 x float>, <4 x float>* %[[d]], align 16 Index: test/CodeGen/arm64-be-bitfield.c =================================================================== --- test/CodeGen/arm64-be-bitfield.c +++ test/CodeGen/arm64-be-bitfield.c @@ -7,6 +7,6 @@ // IR: callee_b0f(i64 [[ARG:%.*]]) // IR: store i64 [[ARG]], i64* [[PTR:%.*]], align 8 // IR: [[BITCAST:%.*]] = bitcast i64* [[PTR]] to i8* -// IR: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* [[BITCAST]], i64 4 +// IR: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* align 8 [[BITCAST]], i64 4 return bp11.b2; } Index: test/CodeGen/arm_neon_intrinsics.c =================================================================== --- test/CodeGen/arm_neon_intrinsics.c +++ test/CodeGen/arm_neon_intrinsics.c @@ -4846,7 +4846,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 @@ -4873,7 +4873,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 @@ -4900,7 +4900,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 @@ -4927,7 +4927,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 @@ -4954,7 +4954,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 @@ -4981,7 +4981,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 @@ -5008,7 +5008,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 @@ -5035,7 +5035,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 @@ -5057,7 +5057,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 @@ -5084,7 +5084,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 @@ -5111,7 +5111,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 @@ -5133,7 +5133,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 @@ -5160,7 +5160,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 @@ -5187,7 +5187,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 @@ -5214,7 +5214,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 @@ -5241,7 +5241,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 @@ -5263,7 +5263,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 @@ -5587,7 +5587,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 @@ -5619,7 +5619,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 @@ -5651,7 +5651,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 @@ -5683,7 +5683,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 @@ -5715,7 +5715,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 @@ -5747,7 +5747,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 @@ -5779,7 +5779,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 @@ -5811,7 +5811,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 @@ -5836,7 +5836,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 @@ -5868,7 +5868,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 @@ -5900,7 +5900,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 @@ -5925,7 +5925,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 @@ -5957,7 +5957,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 @@ -5989,7 +5989,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 @@ -6021,7 +6021,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 @@ -6053,7 +6053,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 @@ -6078,7 +6078,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 @@ -6407,7 +6407,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 @@ -6444,7 +6444,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 @@ -6481,7 +6481,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 @@ -6518,7 +6518,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 @@ -6555,7 +6555,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 @@ -6592,7 +6592,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 @@ -6629,7 +6629,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 @@ -6666,7 +6666,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 @@ -6694,7 +6694,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 @@ -6731,7 +6731,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 @@ -6768,7 +6768,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 @@ -6796,7 +6796,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 @@ -6833,7 +6833,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 @@ -6870,7 +6870,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 @@ -6907,7 +6907,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 @@ -6944,7 +6944,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 @@ -6972,7 +6972,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 @@ -16199,7 +16199,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -16220,7 +16220,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -16246,7 +16246,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 @@ -16272,7 +16272,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -16293,7 +16293,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -16319,7 +16319,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 @@ -16345,7 +16345,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0 @@ -16371,7 +16371,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0 @@ -16397,7 +16397,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -16418,7 +16418,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -16444,7 +16444,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -16465,7 +16465,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -16491,7 +16491,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 @@ -16517,7 +16517,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0 @@ -16543,7 +16543,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -16564,7 +16564,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -16590,7 +16590,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 @@ -16616,7 +16616,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0 @@ -16642,7 +16642,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0 @@ -16668,7 +16668,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0 @@ -16694,7 +16694,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -16715,7 +16715,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -16741,7 +16741,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -16767,7 +16767,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 @@ -16793,7 +16793,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -16819,7 +16819,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 @@ -16845,7 +16845,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0 @@ -16871,7 +16871,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0 @@ -16897,7 +16897,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -16923,7 +16923,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -16944,7 +16944,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -16970,7 +16970,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 @@ -16996,7 +16996,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -17017,7 +17017,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -17043,7 +17043,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 @@ -17069,7 +17069,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0 @@ -17095,7 +17095,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0 @@ -17121,7 +17121,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -17142,7 +17142,7 @@ // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -17168,7 +17168,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -17192,7 +17192,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -17223,7 +17223,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 @@ -17254,7 +17254,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -17278,7 +17278,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -17309,7 +17309,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 @@ -17340,7 +17340,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0 @@ -17371,7 +17371,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0 @@ -17402,7 +17402,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -17426,7 +17426,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -17457,7 +17457,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -17481,7 +17481,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -17512,7 +17512,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 @@ -17543,7 +17543,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0 @@ -17574,7 +17574,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -17598,7 +17598,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -17629,7 +17629,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 @@ -17660,7 +17660,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0 @@ -17691,7 +17691,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0 @@ -17722,7 +17722,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0 @@ -17753,7 +17753,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -17777,7 +17777,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -17808,7 +17808,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -17839,7 +17839,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 @@ -17870,7 +17870,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -17901,7 +17901,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 @@ -17932,7 +17932,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0 @@ -17963,7 +17963,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0 @@ -17994,7 +17994,7 @@ // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -18025,7 +18025,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -18049,7 +18049,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -18080,7 +18080,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 @@ -18111,7 +18111,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -18135,7 +18135,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -18166,7 +18166,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 @@ -18197,7 +18197,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0 @@ -18228,7 +18228,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0 @@ -18259,7 +18259,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -18283,7 +18283,7 @@ // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -18314,7 +18314,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -18341,7 +18341,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -18377,7 +18377,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 @@ -18413,7 +18413,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -18440,7 +18440,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -18476,7 +18476,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 @@ -18512,7 +18512,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0 @@ -18548,7 +18548,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0 @@ -18584,7 +18584,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -18611,7 +18611,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -18647,7 +18647,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -18674,7 +18674,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -18710,7 +18710,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 @@ -18746,7 +18746,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0 @@ -18782,7 +18782,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -18809,7 +18809,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -18845,7 +18845,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 @@ -18881,7 +18881,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0 @@ -18917,7 +18917,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0 @@ -18953,7 +18953,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0 @@ -18989,7 +18989,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -19016,7 +19016,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -19052,7 +19052,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -19088,7 +19088,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 @@ -19124,7 +19124,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -19160,7 +19160,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 @@ -19196,7 +19196,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0 @@ -19232,7 +19232,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0 @@ -19268,7 +19268,7 @@ // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 @@ -19304,7 +19304,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -19331,7 +19331,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -19367,7 +19367,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 @@ -19403,7 +19403,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -19430,7 +19430,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -19466,7 +19466,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 @@ -19502,7 +19502,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0 @@ -19538,7 +19538,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0 @@ -19574,7 +19574,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -19601,7 +19601,7 @@ // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false) // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 @@ -20474,7 +20474,7 @@ // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !noalias !3 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) // CHECK: ret void int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) { return vtrn_s8(a, b); @@ -20493,7 +20493,7 @@ // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !noalias !6 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) { return vtrn_s16(a, b); @@ -20512,7 +20512,7 @@ // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], !noalias !9 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) { return vtrn_s32(a, b); @@ -20529,7 +20529,7 @@ // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !noalias !12 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) // CHECK: ret void uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) { return vtrn_u8(a, b); @@ -20548,7 +20548,7 @@ // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !noalias !15 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { return vtrn_u16(a, b); @@ -20567,7 +20567,7 @@ // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], !noalias !18 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) { return vtrn_u32(a, b); @@ -20586,7 +20586,7 @@ // CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]], !noalias !21 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) { return vtrn_f32(a, b); @@ -20603,7 +20603,7 @@ // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !noalias !24 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) // CHECK: ret void poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) { return vtrn_p8(a, b); @@ -20622,7 +20622,7 @@ // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !noalias !27 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) { return vtrn_p16(a, b); @@ -20639,7 +20639,7 @@ // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !30 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) // CHECK: ret void int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) { return vtrnq_s8(a, b); @@ -20658,7 +20658,7 @@ // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !33 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { return vtrnq_s16(a, b); @@ -20677,7 +20677,7 @@ // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !noalias !36 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) { return vtrnq_s32(a, b); @@ -20694,7 +20694,7 @@ // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !39 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) // CHECK: ret void uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) { return vtrnq_u8(a, b); @@ -20713,7 +20713,7 @@ // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !42 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { return vtrnq_u16(a, b); @@ -20732,7 +20732,7 @@ // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !noalias !45 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { return vtrnq_u32(a, b); @@ -20751,7 +20751,7 @@ // CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]], !noalias !48 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) { return vtrnq_f32(a, b); @@ -20768,7 +20768,7 @@ // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !51 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) // CHECK: ret void poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) { return vtrnq_p8(a, b); @@ -20787,7 +20787,7 @@ // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !54 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) { return vtrnq_p16(a, b); @@ -20968,7 +20968,7 @@ // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !noalias !57 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) // CHECK: ret void int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) { return vuzp_s8(a, b); @@ -20987,7 +20987,7 @@ // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !noalias !60 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) { return vuzp_s16(a, b); @@ -21006,7 +21006,7 @@ // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], !noalias !63 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) { return vuzp_s32(a, b); @@ -21023,7 +21023,7 @@ // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !noalias !66 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) // CHECK: ret void uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) { return vuzp_u8(a, b); @@ -21042,7 +21042,7 @@ // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !noalias !69 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) { return vuzp_u16(a, b); @@ -21061,7 +21061,7 @@ // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], !noalias !72 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) { return vuzp_u32(a, b); @@ -21080,7 +21080,7 @@ // CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]], !noalias !75 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) { return vuzp_f32(a, b); @@ -21097,7 +21097,7 @@ // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !noalias !78 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) // CHECK: ret void poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) { return vuzp_p8(a, b); @@ -21116,7 +21116,7 @@ // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !noalias !81 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) { return vuzp_p16(a, b); @@ -21133,7 +21133,7 @@ // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !84 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) // CHECK: ret void int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) { return vuzpq_s8(a, b); @@ -21152,7 +21152,7 @@ // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !87 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) { return vuzpq_s16(a, b); @@ -21171,7 +21171,7 @@ // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !noalias !90 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) { return vuzpq_s32(a, b); @@ -21188,7 +21188,7 @@ // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !93 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) // CHECK: ret void uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) { return vuzpq_u8(a, b); @@ -21207,7 +21207,7 @@ // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !96 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { return vuzpq_u16(a, b); @@ -21226,7 +21226,7 @@ // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !noalias !99 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) { return vuzpq_u32(a, b); @@ -21245,7 +21245,7 @@ // CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]], !noalias !102 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) { return vuzpq_f32(a, b); @@ -21262,7 +21262,7 @@ // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !105 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) // CHECK: ret void poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) { return vuzpq_p8(a, b); @@ -21281,7 +21281,7 @@ // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !108 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) { return vuzpq_p16(a, b); @@ -21298,7 +21298,7 @@ // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !noalias !111 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) // CHECK: ret void int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) { return vzip_s8(a, b); @@ -21317,7 +21317,7 @@ // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !noalias !114 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) { return vzip_s16(a, b); @@ -21336,7 +21336,7 @@ // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], !noalias !117 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) { return vzip_s32(a, b); @@ -21353,7 +21353,7 @@ // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !noalias !120 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) // CHECK: ret void uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) { return vzip_u8(a, b); @@ -21372,7 +21372,7 @@ // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !noalias !123 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) { return vzip_u16(a, b); @@ -21391,7 +21391,7 @@ // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], !noalias !126 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) { return vzip_u32(a, b); @@ -21410,7 +21410,7 @@ // CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]], !noalias !129 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) { return vzip_f32(a, b); @@ -21427,7 +21427,7 @@ // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !noalias !132 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) // CHECK: ret void poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) { return vzip_p8(a, b); @@ -21446,7 +21446,7 @@ // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !noalias !135 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) // CHECK: ret void poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) { return vzip_p16(a, b); @@ -21463,7 +21463,7 @@ // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !138 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) // CHECK: ret void int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) { return vzipq_s8(a, b); @@ -21482,7 +21482,7 @@ // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !141 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) { return vzipq_s16(a, b); @@ -21501,7 +21501,7 @@ // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !noalias !144 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) { return vzipq_s32(a, b); @@ -21518,7 +21518,7 @@ // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !147 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) // CHECK: ret void uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) { return vzipq_u8(a, b); @@ -21537,7 +21537,7 @@ // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !150 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { return vzipq_u16(a, b); @@ -21556,7 +21556,7 @@ // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !noalias !153 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) { return vzipq_u32(a, b); @@ -21575,7 +21575,7 @@ // CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]], !noalias !156 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) { return vzipq_f32(a, b); @@ -21592,7 +21592,7 @@ // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !159 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) // CHECK: ret void poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) { return vzipq_p8(a, b); @@ -21611,7 +21611,7 @@ // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !162 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) // CHECK: ret void poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) { return vzipq_p16(a, b); Index: test/CodeGen/atomic-arm64.c =================================================================== --- test/CodeGen/atomic-arm64.c +++ test/CodeGen/atomic-arm64.c @@ -65,7 +65,7 @@ // CHECK: [[TEMP:%.*]] = alloca [[QUAD_T:%.*]], align 8 // CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i8* // CHECK-NEXT: [[T1:%.*]] = bitcast [[QUAD_T]]* {{%.*}} to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 32, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[T0]], i8* align 8 [[T1]], i64 32, i1 false) // CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i256* // CHECK-NEXT: [[T1:%.*]] = bitcast i256* [[T0]] to i8* // CHECK-NEXT: call void @__atomic_store(i64 32, i8* bitcast ([[QUAD_T]]* @a_pointer_quad to i8*), i8* [[T1]], i32 5) Index: test/CodeGen/block-byref-aggr.c =================================================================== --- test/CodeGen/block-byref-aggr.c +++ test/CodeGen/block-byref-aggr.c @@ -24,7 +24,7 @@ // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[BYREF]], [[BYREF]]* [[T0]], i32 0, i32 4 // CHECK-NEXT: [[T2:%.*]] = bitcast [[AGG]]* [[T1]] to i8* // CHECK-NEXT: [[T3:%.*]] = bitcast [[AGG]]* [[TEMP]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T2]], i8* [[T3]], i64 4, i32 4, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[T2]], i8* align 4 [[T3]], i64 4, i1 false) // Verify that there's nothing else significant in the function. // CHECK-NEXT: [[T0:%.*]] = bitcast [[BYREF]]* [[A]] to i8* // CHECK-NEXT: call void @_Block_object_dispose(i8* [[T0]], i32 8) @@ -50,14 +50,14 @@ // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[B_BYREF]], [[B_BYREF]]* [[T0]], i32 0, i32 4 // CHECK-NEXT: [[T2:%.*]] = bitcast [[AGG]]* [[T1]] to i8* // CHECK-NEXT: [[T3:%.*]] = bitcast [[AGG]]* [[TEMP]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T2]], i8* [[T3]], i64 4, i32 4, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[T2]], i8* align 4 [[T3]], i64 4, i1 false) // Then for 'a': // CHECK-NEXT: [[A_FORWARDING:%.*]] = getelementptr inbounds [[A_BYREF]], [[A_BYREF]]* [[A]], i32 0, i32 1 // CHECK-NEXT: [[T0:%.*]] = load [[A_BYREF]]*, [[A_BYREF]]** [[A_FORWARDING]] // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[A_BYREF]], [[A_BYREF]]* [[T0]], i32 0, i32 4 // CHECK-NEXT: [[T2:%.*]] = bitcast [[AGG]]* [[T1]] to i8* // CHECK-NEXT: [[T3:%.*]] = bitcast [[AGG]]* [[TEMP]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T2]], i8* [[T3]], i64 4, i32 4, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[T2]], i8* align 4 [[T3]], i64 4, i1 false) // Verify that there's nothing else significant in the function. // CHECK-NEXT: [[T0:%.*]] = bitcast [[B_BYREF]]* [[B]] to i8* // CHECK-NEXT: call void @_Block_object_dispose(i8* [[T0]], i32 8) Index: test/CodeGen/builtin-memfns.c =================================================================== --- test/CodeGen/builtin-memfns.c +++ test/CodeGen/builtin-memfns.c @@ -54,14 +54,14 @@ int test7(int *p) { struct snd_pcm_hw_params_t* hwparams; // incomplete type. - // CHECK: call void @llvm.memset{{.*}}256, i32 4, i1 false) + // CHECK: call void @llvm.memset{{.*}} align 4 {{.*}}256, i1 false) __builtin_memset(p, 0, 256); // Should be alignment = 4 - // CHECK: call void @llvm.memset{{.*}}256, i32 1, i1 false) + // CHECK: call void @llvm.memset{{.*}} align 1 {{.*}}256, i1 false) __builtin_memset((char*)p, 0, 256); // Should be alignment = 1 __builtin_memset(hwparams, 0, 256); // No crash alignment = 1 - // CHECK: call void @llvm.memset{{.*}}256, i32 1, i1 false) + // CHECK: call void @llvm.memset{{.*}} align 1{{.*}}256, i1 false) } // @@ -73,13 +73,13 @@ struct PS ps; void test8(int *arg) { // CHECK: @test8 - // CHECK: call void @llvm.memcpy{{.*}} 16, i32 1, i1 false) + // CHECK: call void @llvm.memcpy{{.*}} align 4 {{.*}} align 1 {{.*}} 16, i1 false) __builtin_memcpy(arg, ps.modes, sizeof(struct PS)); } __attribute((aligned(16))) int x[4], y[4]; void test9() { // CHECK: @test9 - // CHECK: call void @llvm.memcpy{{.*}} 16, i32 16, i1 false) + // CHECK: call void @llvm.memcpy{{.*}} align 16 {{.*}} align 16 {{.*}} 16, i1 false) __builtin_memcpy(x, y, sizeof(y)); } Index: test/CodeGen/c11atomics-ios.c =================================================================== --- test/CodeGen/c11atomics-ios.c +++ test/CodeGen/c11atomics-ios.c @@ -132,7 +132,7 @@ // CHECK-NEXT: [[T0:%.*]] = load [[S]]*, [[S]]** [[FP]] // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8* // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[T1]], i8* align 2 [[T2]], i32 8, i1 false) // CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[TMP0]] to i64* // CHECK-NEXT: [[T4:%.*]] = load i64, i64* [[T3]], align 8 // CHECK-NEXT: [[T5:%.*]] = bitcast [[S]]* [[T0]] to i64* @@ -154,7 +154,7 @@ // CHECK-NEXT: [[P:%.*]] = load [[APS]]*, [[APS]]** [[FP]] // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8* -// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[T0]], i8 0, i64 8, i1 false) // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[P]], i32 0, i32 0 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0 // CHECK-NEXT: store i16 1, i16* [[T1]], align 8 @@ -165,7 +165,7 @@ __c11_atomic_init(fp, (PS){1,2,3}); // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8* -// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 8 [[T0]], i8 0, i32 8, i1 false) // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[X]], i32 0, i32 0 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0 // CHECK-NEXT: store i16 1, i16* [[T1]], align 8 @@ -183,16 +183,16 @@ // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP0]], i32 0, i32 0 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8* // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[T1]], i8* align 8 [[T2]], i32 6, i1 false) PS f = *fp; // CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]] // CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8* -// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 8 [[T1]], i8 0, i32 8, i1 false) // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP1]], i32 0, i32 0 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8* // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[T2]], i8* align 2 [[T3]], i32 6, i1 false) // CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[TMP1]] to i64* // CHECK-NEXT: [[T5:%.*]] = load i64, i64* [[T4]], align 8 // CHECK-NEXT: [[T6:%.*]] = bitcast [[APS]]* [[T0]] to i64* @@ -215,7 +215,7 @@ // CHECK: [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS* // CHECK: [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8* // CHECK: [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[AGG_RESULT8]], i8* align 8 [[ATOMIC_RES8]], i32 6, i1 false) return __c11_atomic_load(addr, 5); } @@ -232,11 +232,11 @@ // CHECK: [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4 // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* // CHECK: [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[NONATOMIC_TMP8]], i8* align 2 [[VAL8]], i32 6, i1 false) // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* // CHECK: [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8* // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_VAL8]], i8* align 2 [[NONATOMIC_TMP8]], i64 6, i1 false) // CHECK: [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64* // CHECK: [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 8 // CHECK: store atomic i64 [[VAL64]], i64* [[ADDR64]] seq_cst, align 8 @@ -257,11 +257,11 @@ // CHECK: [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4 // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* // CHECK: [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[NONATOMIC_TMP8]], i8* align 2 [[VAL8]], i32 6, i1 false) // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* // CHECK: [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8* // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_VAL8]], i8* align 2 [[NONATOMIC_TMP8]], i64 6, i1 false) // CHECK: [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64* // CHECK: [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64* // CHECK: [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 8 @@ -270,7 +270,7 @@ // CHECK: [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS* // CHECK: [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8* // CHECK: [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[AGG_RESULT8]], i8* align 8 [[ATOMIC_RES8]], i32 6, i1 false) return __c11_atomic_exchange(addr, *val, 5); } @@ -291,15 +291,15 @@ // CHECK: [[NEW:%.*]] = load %struct.PS*, %struct.PS** [[NEW_ARG]], align 4 // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* // CHECK: [[NEW8:%.*]] = bitcast %struct.PS* [[NEW]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[NEW8]], i32 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[NONATOMIC_TMP8]], i8* align 2 [[NEW8]], i32 6, i1 false) // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* // CHECK: [[ATOMIC_DESIRED8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED:%.*]] to i8* // CHECK: [[DESIRED8:%.*]] = bitcast %struct.PS* [[DESIRED]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_DESIRED8]], i8* [[DESIRED8]], i64 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_DESIRED8]], i8* align 2 [[DESIRED8]], i64 6, i1 false) // CHECK: [[ATOMIC_DESIRED64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED:%.*]] to i64* // CHECK: [[ATOMIC_NEW8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i8* // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_NEW8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_NEW8]], i8* align 2 [[NONATOMIC_TMP8]], i64 6, i1 false) // CHECK: [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64* // CHECK: [[ATOMIC_DESIRED_VAL64:%.*]] = load i64, i64* [[ATOMIC_DESIRED64]], align 8 // CHECK: [[ATOMIC_NEW_VAL64:%.*]] = load i64, i64* [[ATOMIC_NEW64]], align 8 Index: test/CodeGen/c11atomics.c =================================================================== --- test/CodeGen/c11atomics.c +++ test/CodeGen/c11atomics.c @@ -282,7 +282,7 @@ // CHECK-NEXT: [[T0:%.*]] = load [[S]]*, [[S]]** [[FP]] // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8* // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[T1]], i8* align 2 [[T2]], i32 8, i1 false) // CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[T0]] to i8* // CHECK-NEXT: [[T4:%.*]] = bitcast [[S]]* [[TMP0]] to i8* // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T3]], i8* [[T4]], i32 5) @@ -307,7 +307,7 @@ // CHECK-NEXT: [[P:%.*]] = load [[APS]]*, [[APS]]** [[FP]] // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8* -// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[T0]], i8 0, i64 8, i1 false) // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[P]], i32 0, i32 0 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0 // CHECK-NEXT: store i16 1, i16* [[T1]], align 8 @@ -318,7 +318,7 @@ __c11_atomic_init(fp, (PS){1,2,3}); // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8* -// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 8 [[T0]], i8 0, i32 8, i1 false) // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[X]], i32 0, i32 0 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0 // CHECK-NEXT: store i16 1, i16* [[T1]], align 8 @@ -335,16 +335,16 @@ // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP0]], i32 0, i32 0 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8* // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[T1]], i8* align 8 [[T2]], i32 6, i1 false) PS f = *fp; // CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]] // CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8* -// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 8 [[T1]], i8 0, i32 8, i1 false) // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP1]], i32 0, i32 0 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8* // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[T2]], i8* align 2 [[T3]], i32 6, i1 false) // CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[T0]] to i8* // CHECK-NEXT: [[T5:%.*]] = bitcast [[APS]]* [[TMP1]] to i8* // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T4]], i8* [[T5]], i32 5) @@ -357,7 +357,7 @@ // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP3]], i32 0, i32 0 // CHECK-NEXT: [[T1:%.*]] = bitcast %struct.PS* [[TMP2]] to i8* // CHECK-NEXT: [[T2:%.*]] = bitcast %struct.PS* [[T0]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[T1]], i8* align 8 [[T2]], i32 6, i1 false) // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS, %struct.PS* [[TMP2]], i32 0, i32 0 // CHECK-NEXT: [[T1:%.*]] = load i16, i16* [[T0]], align 2 // CHECK-NEXT: [[T2:%.*]] = sext i16 [[T1]] to i32 @@ -381,7 +381,7 @@ // CHECK: [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS* // CHECK: [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8* // CHECK: [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[AGG_RESULT8]], i8* align 8 [[ATOMIC_RES8]], i32 6, i1 false) return __c11_atomic_load(addr, 5); } @@ -398,11 +398,11 @@ // CHECK: [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4 // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* // CHECK: [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[NONATOMIC_TMP8]], i8* align 2 [[VAL8]], i32 6, i1 false) // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* // CHECK: [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8* // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_VAL8]], i8* align 2 [[NONATOMIC_TMP8]], i64 6, i1 false) // CHECK: [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64* // CHECK: [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8* // CHECK: [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 2 @@ -423,11 +423,11 @@ // CHECK: [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4 // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* // CHECK: [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[NONATOMIC_TMP8]], i8* align 2 [[VAL8]], i32 6, i1 false) // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* // CHECK: [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8* // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_VAL8]], i8* align 2 [[NONATOMIC_TMP8]], i64 6, i1 false) // CHECK: [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64* // CHECK: [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64* // CHECK: [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8* @@ -437,7 +437,7 @@ // CHECK: [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS* // CHECK: [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8* // CHECK: [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[AGG_RESULT8]], i8* align 8 [[ATOMIC_RES8]], i32 6, i1 false) return __c11_atomic_exchange(addr, *val, 5); } @@ -457,15 +457,15 @@ // CHECK: [[NEW:%.*]] = load %struct.PS*, %struct.PS** [[NEW_ARG]], align 4 // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* // CHECK: [[NEW8:%.*]] = bitcast %struct.PS* [[NEW]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[NEW8]], i32 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[NONATOMIC_TMP8]], i8* align 2 [[NEW8]], i32 6, i1 false) // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* // CHECK: [[ATOMIC_DESIRED8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i8* // CHECK: [[DESIRED8:%.*]] = bitcast %struct.PS* [[DESIRED]]to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_DESIRED8]], i8* [[DESIRED8]], i64 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_DESIRED8]], i8* align 2 [[DESIRED8]], i64 6, i1 false) // CHECK: [[ATOMIC_DESIRED64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i64* // CHECK: [[ATOMIC_NEW8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i8* // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_NEW8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[ATOMIC_NEW8]], i8* align 2 [[NONATOMIC_TMP8]], i64 6, i1 false) // CHECK: [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64* // CHECK: [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8* // CHECK: [[ATOMIC_DESIRED8:%.*]] = bitcast i64* [[ATOMIC_DESIRED64]] to i8* Index: test/CodeGen/compound-literal.c =================================================================== --- test/CodeGen/compound-literal.c +++ test/CodeGen/compound-literal.c @@ -33,7 +33,7 @@ // CHECK-NEXT: store i32 [[TMP]], i32* [[CY]] // CHECK-NEXT: [[SI8:%[a-zA-Z0-9.]+]] = bitcast [[STRUCT]]* [[S]] to i8* // CHECK-NEXT: [[COMPOUNDLITI8:%[a-zA-Z0-9.]+]] = bitcast [[STRUCT]]* [[COMPOUNDLIT]] to i8* - // CHECK-NEXT: call void @llvm.memcpy{{.*}}(i8* [[SI8]], i8* [[COMPOUNDLITI8]] + // CHECK-NEXT: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} [[SI8]], i8* align {{[0-9]+}} [[COMPOUNDLITI8]] s = (S){s.y,s.x}; // CHECK-NEXT: ret void } @@ -67,7 +67,7 @@ // CHECK-NEXT: [[T0:%.*]] = bitcast i48* [[COERCE_TEMP]] to i8* // CHECK-NEXT: [[T1:%.*]] = bitcast [[G]]* [[RESULT]] to i8* - // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 6 + // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} [[T0]], i8* align {{[0-9]+}} [[T1]], i64 6 // CHECK-NEXT: [[T0:%.*]] = load i48, i48* [[COERCE_TEMP]] // CHECK-NEXT: ret i48 [[T0]] } Index: test/CodeGen/le32-vaarg.c =================================================================== --- test/CodeGen/le32-vaarg.c +++ test/CodeGen/le32-vaarg.c @@ -23,7 +23,7 @@ // CHECK: [[RESULT:%[a-z_0-9]+]] = va_arg {{.*}}, %struct.Foo{{$}} // CHECK: store %struct.Foo [[RESULT]], %struct.Foo* [[LOC:%[a-z_0-9]+]] // CHECK: [[LOC2:%[a-z_0-9]+]] = bitcast {{.*}} [[LOC]] to i8* -// CHECK: call void @llvm.memcpy{{.*}}@dest{{.*}}, i8* [[LOC2]] +// CHECK: call void @llvm.memcpy{{.*}}@dest{{.*}}, i8* align {{[0-9]+}} [[LOC2]] void skip_struct(va_list *args) { va_arg(*args, struct Foo); Index: test/CodeGen/ms-intrinsics.c =================================================================== --- test/CodeGen/ms-intrinsics.c +++ test/CodeGen/ms-intrinsics.c @@ -20,12 +20,12 @@ } // CHECK-I386: define{{.*}}void @test__stosb -// CHECK-I386: tail call void @llvm.memset.p0i8.i32(i8* %Dest, i8 %Data, i32 %Count, i32 1, i1 true) +// CHECK-I386: tail call void @llvm.memset.p0i8.i32(i8* align 1 %Dest, i8 %Data, i32 %Count, i1 true) // CHECK-I386: ret void // CHECK-I386: } // CHECK-X64: define{{.*}}void @test__stosb -// CHECK-X64: tail call void @llvm.memset.p0i8.i64(i8* %Dest, i8 %Data, i64 %Count, i32 1, i1 true) +// CHECK-X64: tail call void @llvm.memset.p0i8.i64(i8* align 1 %Dest, i8 %Data, i64 %Count, i1 true) // CHECK-X64: ret void // CHECK-X64: } Index: test/CodeGen/no-opt-volatile-memcpy.c =================================================================== --- test/CodeGen/no-opt-volatile-memcpy.c +++ test/CodeGen/no-opt-volatile-memcpy.c @@ -18,10 +18,10 @@ // CHECK: %[[LS:.*]] = alloca %struct.s, align 4 // CHECK-NEXT: %[[ZERO:.*]] = bitcast %struct.s* %[[LS]] to i8* // CHECK-NEXT: %[[ONE:.*]] = bitcast %struct.s* %[[LS]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* %[[ZERO]], i8* %[[ONE]], i64 132, i32 4, i1 true) -// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true) +// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* align 4 %[[ZERO]], i8* align 4 %[[ONE]], i64 132, i1 true) +// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i1 true) // CHECK-NEXT: %[[TWO:.*]] = bitcast %struct.s* %[[LS]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* %[[TWO]], i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true) +// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* align 4 %[[TWO]], i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i1 true) struct s1 { @@ -35,6 +35,6 @@ s.y = gs; } // CHECK-LABEL: define void @fee() -// CHECK: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true) -// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true) +// CHECK: call void @llvm.memcpy.{{.*}}(i8* align 4 getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* align 4 getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i64 132, i1 true) +// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* align 4 getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i1 true) Index: test/CodeGen/packed-nest-unpacked.c =================================================================== --- test/CodeGen/packed-nest-unpacked.c +++ test/CodeGen/packed-nest-unpacked.c @@ -9,26 +9,26 @@ // struct X test1() { // CHECK: @test1 - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i32 1, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i1 false) return g.y; } struct X test2() { // CHECK: @test2 - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i32 1, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i1 false) struct X a = g.y; return a; } void test3(struct X a) { // CHECK: @test3 - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i8* {{.*}}, i64 24, i32 1, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i8* {{.*}}, i64 24, i1 false) g.y = a; } // void test4() { // CHECK: @test4 - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i32 1, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i1 false) f(g.y); } @@ -42,7 +42,7 @@ // void test6() { // CHECK: @test6 - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i8* %{{.*}}, i64 24, i32 1, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i8* align 4 %{{.*}}, i64 24, i1 false) g.y = foo(); } Index: test/CodeGen/packed-structure.c =================================================================== --- test/CodeGen/packed-structure.c +++ test/CodeGen/packed-structure.c @@ -29,7 +29,7 @@ // CHECK-FUNCTIONS: ret i32 [[s0_load_y]] int s0_load_y(struct s0 *a) { return a->y; } // CHECK-FUNCTIONS-LABEL: define void @s0_copy -// CHECK-FUNCTIONS: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i32 4, i1 false) +// CHECK-FUNCTIONS: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 {{.*}}, i8* align 4 {{.*}}, i64 8, i1 false) void s0_copy(struct s0 *a, struct s0 *b) { *b = *a; } // @@ -55,7 +55,7 @@ // CHECK-FUNCTIONS: ret i32 [[s1_load_y]] int s1_load_y(struct s1 *a) { return a->y; } // CHECK-FUNCTIONS-LABEL: define void @s1_copy -// CHECK-FUNCTIONS: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i32 1, i1 false) +// CHECK-FUNCTIONS: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false) void s1_copy(struct s1 *a, struct s1 *b) { *b = *a; } // @@ -83,7 +83,7 @@ // CHECK-FUNCTIONS: ret i32 [[s2_load_y]] int s2_load_y(struct s2 *a) { return a->y; } // CHECK-FUNCTIONS-LABEL: define void @s2_copy -// CHECK-FUNCTIONS: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i32 2, i1 false) +// CHECK-FUNCTIONS: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 {{.*}}, i8* align 2 {{.*}}, i64 8, i1 false) void s2_copy(struct s2 *a, struct s2 *b) { *b = *a; } struct __attribute__((packed, aligned)) s3 { Index: test/CodeGen/partial-reinitialization2.c =================================================================== --- test/CodeGen/partial-reinitialization2.c +++ test/CodeGen/partial-reinitialization2.c @@ -15,7 +15,7 @@ // CHECK-LABEL: test1 void test1(void) { - // CHECK: call void @llvm.memcpy{{.*}}%struct.P1, %struct.P1* @g1{{.*}}i64 6, i32 {{[0-9]}}, i1 false) + // CHECK: call void @llvm.memcpy{{.*}}%struct.P1, %struct.P1* @g1{{.*}}i64 6, i1 false) // CHECK: store i8 120, i8* % struct LP1 l = { .p1 = g1, .p1.x[2] = 'x' }; @@ -24,7 +24,7 @@ // CHECK-LABEL: test2 void test2(void) { - // CHECK: call void @llvm.memcpy{{.*}}%struct.P1, %struct.P1* @g1{{.*}}i64 6, i32 {{[0-9]}}, i1 false) + // CHECK: call void @llvm.memcpy{{.*}}%struct.P1, %struct.P1* @g1{{.*}}i64 6, i1 false) // CHECK: store i8 114, i8* % struct LP1 l = { .p1 = g1, .p1.x[1] = 'r' }; @@ -33,7 +33,7 @@ // CHECK-LABEL: test3 void test3(void) { - // CHECK: call void @llvm.memcpy{{.*}}%struct.P2* @g2{{.*}}i64 12, i32 {{[0-9]}}, i1 false) + // CHECK: call void @llvm.memcpy{{.*}}%struct.P2* @g2{{.*}}i64 12, i1 false) // CHECK: store i32 10, i32* % struct LP2 l = { .p2 = g2, .p2.b = 10 }; @@ -66,7 +66,7 @@ // CHECK: [[CALL:%[a-z0-9]+]] = call {{.*}}@get123() // CHECK: store{{.*}}[[CALL]], {{.*}}[[TMP0:%[a-z0-9]+]] // CHECK: [[TMP1:%[a-z0-9]+]] = bitcast {{.*}}[[TMP0]] - // CHECK: call void @llvm.memcpy{{.*}}[[TMP1]], i64 12, i32 {{[0-9]}}, i1 false) + // CHECK: call void @llvm.memcpy{{.*}}[[TMP1]], i64 12, i1 false) // CHECK: store i32 100, i32* % struct LUP2 { union UP2 up; } var = { get123(), .up.p2.a = 100 }; @@ -76,12 +76,12 @@ void test5(void) { // .l3 = g3 - // CHECK: call void @llvm.memcpy{{.*}}%struct.LP3, %struct.LP3* @g3{{.*}}i64 12, i32 {{[0-9]}}, i1 false) + // CHECK: call void @llvm.memcpy{{.*}}%struct.LP3, %struct.LP3* @g3{{.*}}i64 12, i1 false) // .l3.p1 = { [0] = g1 } implicitly sets [1] to zero - // CHECK: call void @llvm.memcpy{{.*}}%struct.P1, %struct.P1* @g1{{.*}}i64 6, i32 {{[0-9]}}, i1 false) + // CHECK: call void @llvm.memcpy{{.*}}%struct.P1, %struct.P1* @g1{{.*}}i64 6, i1 false) // CHECK: getelementptr{{.*}}%struct.P1, %struct.P1*{{.*}}i64 1 - // CHECK: call void @llvm.memset{{.*}}i8 0, i64 6, i32 {{[0-9]}}, i1 false) + // CHECK: call void @llvm.memset{{.*}}i8 0, i64 6, i1 false) // .l3.p1[1].x[1] = 'x' // CHECK: store i8 120, i8* % @@ -98,7 +98,7 @@ // CHECK: [[CALL:%[a-z0-9]+]] = call {{.*}}@get235() // CHECK: store{{.*}}[[CALL]], {{.*}}[[TMP0:%[a-z0-9]+]] // CHECK: [[TMP1:%[a-z0-9]+]] = bitcast {{.*}}[[TMP0]] - // CHECK: call void @llvm.memcpy{{.*}}[[TMP1]], i64 12, i32 {{[0-9]}}, i1 false) + // CHECK: call void @llvm.memcpy{{.*}}[[TMP1]], i64 12, i1 false) // CHECK: store i32 10, i32* % Index: test/CodeGen/ppc-varargs-struct.c =================================================================== --- test/CodeGen/ppc-varargs-struct.c +++ test/CodeGen/ppc-varargs-struct.c @@ -54,7 +54,7 @@ // CHECK-PPC-NEXT: [[AGGR:%[a-z0-9]+]] = load %struct.x*, %struct.x** [[VAARG_ADDR]] // CHECK-PPC-NEXT: [[DEST:%[0-9]+]] = bitcast %struct.x* %t to i8* // CHECK-PPC-NEXT: [[SRC:%.+]] = bitcast %struct.x* [[AGGR]] to i8* -// CHECK-PPC-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[DEST]], i8* [[SRC]], i32 16, i32 8, i1 false) +// CHECK-PPC-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[DEST]], i8* align 8 [[SRC]], i32 16, i1 false) int v = va_arg (ap, int); Index: test/CodeGen/ppc64-align-struct.c =================================================================== --- test/CodeGen/ppc64-align-struct.c +++ test/CodeGen/ppc64-align-struct.c @@ -56,7 +56,7 @@ // CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test1* // CHECK: [[DEST:%.*]] = bitcast %struct.test1* %y to i8* // CHECK: [[SRC:%.*]] = bitcast %struct.test1* [[T0]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 8, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[DEST]], i8* align 8 [[SRC]], i64 8, i1 false) struct test1 test1va (int x, ...) { struct test1 y; @@ -79,7 +79,7 @@ // CHECK: [[T0:%.*]] = bitcast i8* %[[ALIGN]] to %struct.test2* // CHECK: [[DEST:%.*]] = bitcast %struct.test2* %y to i8* // CHECK: [[SRC:%.*]] = bitcast %struct.test2* [[T0]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[DEST]], i8* align 16 [[SRC]], i64 16, i1 false) struct test2 test2va (int x, ...) { struct test2 y; @@ -102,7 +102,7 @@ // CHECK: [[T0:%.*]] = bitcast i8* %[[ALIGN]] to %struct.test3* // CHECK: [[DEST:%.*]] = bitcast %struct.test3* %y to i8* // CHECK: [[SRC:%.*]] = bitcast %struct.test3* [[T0]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 [[DEST]], i8* align 16 [[SRC]], i64 32, i1 false) struct test3 test3va (int x, ...) { struct test3 y; @@ -121,7 +121,7 @@ // CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test4* // CHECK: [[DEST:%.*]] = bitcast %struct.test4* %y to i8* // CHECK: [[SRC:%.*]] = bitcast %struct.test4* [[T0]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 12, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[DEST]], i8* align 8 [[SRC]], i64 12, i1 false) struct test4 test4va (int x, ...) { struct test4 y; @@ -140,7 +140,7 @@ // CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test_longdouble* // CHECK: [[DEST:%.*]] = bitcast %struct.test_longdouble* %y to i8* // CHECK: [[SRC:%.*]] = bitcast %struct.test_longdouble* [[T0]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[DEST]], i8* align 8 [[SRC]], i64 16, i1 false) struct test_longdouble { long double x; }; struct test_longdouble testva_longdouble (int x, ...) { @@ -164,7 +164,7 @@ // CHECK: [[T0:%.*]] = bitcast i8* %[[ALIGN]] to %struct.test_vector* // CHECK: [[DEST:%.*]] = bitcast %struct.test_vector* %y to i8* // CHECK: [[SRC:%.*]] = bitcast %struct.test_vector* [[T0]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[DEST]], i8* align 16 [[SRC]], i64 16, i1 false) struct test_vector { vector int x; }; struct test_vector testva_vector (int x, ...) { Index: test/CodeGen/ppc64-soft-float.c =================================================================== --- test/CodeGen/ppc64-soft-float.c +++ test/CodeGen/ppc64-soft-float.c @@ -92,7 +92,7 @@ // CHECK-BE: %[[TMP0:[^ ]+]] = alloca %struct.f3, align 4 // CHECK: %[[TMP1:[^ ]+]] = alloca [2 x i64] // CHECK: %[[TMP2:[^ ]+]] = bitcast [2 x i64]* %[[TMP1]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f3* @global_f3 to i8*), i64 12, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.f3* @global_f3 to i8*), i64 12, i1 false) // CHECK: %[[TMP3:[^ ]+]] = load [2 x i64], [2 x i64]* %[[TMP1]] // CHECK-LE: call { i64, i64 } @func_f3([2 x i64] %[[TMP3]]) // CHECK-BE: call void @func_f3(%struct.f3* sret %[[TMP0]], [2 x i64] %[[TMP3]]) @@ -111,7 +111,7 @@ // CHECK: %[[TMP0:[^ ]+]] = alloca %struct.f5, align 4 // CHECK: %[[TMP1:[^ ]+]] = alloca [3 x i64] // CHECK: %[[TMP2:[^ ]+]] = bitcast [3 x i64]* %[[TMP1]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f5* @global_f5 to i8*), i64 20, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.f5* @global_f5 to i8*), i64 20, i1 false) // CHECK: %[[TMP3:[^ ]+]] = load [3 x i64], [3 x i64]* %[[TMP1]] // CHECK: call void @func_f5(%struct.f5* sret %[[TMP0]], [3 x i64] %[[TMP3]]) struct f5 global_f5; @@ -128,7 +128,7 @@ // CHECK: %[[TMP0:[^ ]+]] = alloca %struct.f7, align 4 // CHECK: %[[TMP1:[^ ]+]] = alloca [4 x i64], align 8 // CHECK: %[[TMP2:[^ ]+]] = bitcast [4 x i64]* %[[TMP1]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f7* @global_f7 to i8*), i64 28, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.f7* @global_f7 to i8*), i64 28, i1 false) // CHECK: %[[TMP3:[^ ]+]] = load [4 x i64], [4 x i64]* %[[TMP1]], align 8 // CHECK: call void @func_f7(%struct.f7* sret %[[TMP0]], [4 x i64] %[[TMP3]]) struct f7 global_f7; @@ -144,7 +144,7 @@ // CHECK-LABEL: @call_f9 // CHECK: %[[TMP1:[^ ]+]] = alloca [5 x i64] // CHECK: %[[TMP2:[^ ]+]] = bitcast [5 x i64]* %[[TMP1]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.f9* @global_f9 to i8*), i64 36, i1 false) // CHECK: %[[TMP3:[^ ]+]] = load [5 x i64], [5 x i64]* %[[TMP1]] // CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, [5 x i64] %[[TMP3]]) struct f9 global_f9; @@ -162,7 +162,7 @@ // CHECK-BE: %[[TMPX:[^ ]+]] = alloca %struct.fabc, align 4 // CHECK: %[[TMP0:[^ ]+]] = alloca [2 x i64], align 8 // CHECK: %[[TMP2:[^ ]+]] = bitcast [2 x i64]* %[[TMP0]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.fabc* @global_fabc to i8*), i64 12, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.fabc* @global_fabc to i8*), i64 12, i1 false) // CHECK: %[[TMP3:[^ ]+]] = load [2 x i64], [2 x i64]* %[[TMP0]], align 8 // CHECK-LE: %call = call { i64, i64 } @func_fabc([2 x i64] %[[TMP3]]) // CHECK-BE: call void @func_fabc(%struct.fabc* sret %[[TMPX]], [2 x i64] %[[TMP3]]) Index: test/CodeGen/ppc64le-aggregates.c =================================================================== --- test/CodeGen/ppc64le-aggregates.c +++ test/CodeGen/ppc64le-aggregates.c @@ -104,7 +104,7 @@ // CHECK-LABEL: @call_f9 // CHECK: %[[TMP1:[^ ]+]] = alloca [5 x i64] // CHECK: %[[TMP2:[^ ]+]] = bitcast [5 x i64]* %[[TMP1]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.f9* @global_f9 to i8*), i64 36, i1 false) // CHECK: %[[TMP3:[^ ]+]] = load [5 x i64], [5 x i64]* %[[TMP1]] // CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, [5 x i64] %[[TMP3]]) struct f9 global_f9; Index: test/CodeGen/sparc-vaarg.c =================================================================== --- test/CodeGen/sparc-vaarg.c +++ test/CodeGen/sparc-vaarg.c @@ -19,7 +19,7 @@ // CHECK-LABEL: define void @get_struct // CHECK: [[RESULT:%[a-z_0-9]+]] = va_arg {{.*}}, %struct.Foo*{{$}} // CHECK: [[RESULT2:%[a-z_0-9]+]] = bitcast {{.*}} [[RESULT]] to i8* -// CHECK: call void @llvm.memcpy{{.*}}@dest{{.*}}, i8* [[RESULT2]] +// CHECK: call void @llvm.memcpy{{.*}}@dest{{.*}}, i8* align {{[0-9]+}} [[RESULT2]] void get_struct(va_list *args) { dest = va_arg(*args, struct Foo); } Index: test/CodeGen/tbaa-struct.cpp =================================================================== --- test/CodeGen/tbaa-struct.cpp +++ test/CodeGen/tbaa-struct.cpp @@ -12,7 +12,7 @@ *a = *b; } -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 16, i32 4, i1 false), !tbaa.struct [[TS:!.*]] +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i64 16, i1 false), !tbaa.struct [[TS:!.*]] struct B { char c1; @@ -24,7 +24,7 @@ *a = *b; } -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 24, i32 4, i1 false), !tbaa.struct [[TS2:!.*]] +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i64 24, i1 false), !tbaa.struct [[TS2:!.*]] typedef _Complex int T2; typedef _Complex char T5; @@ -36,7 +36,7 @@ *a = *b; } -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 12, i32 4, i1 false), !tbaa.struct [[TS3:!.*]] +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i64 12, i1 false), !tbaa.struct [[TS3:!.*]] // Make sure that zero-length bitfield works. #define ATTR __attribute__ ((ms_struct)) @@ -49,7 +49,7 @@ void copy4(struct five *a, struct five *b) { *a = *b; } -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 3, i32 1, i1 false), !tbaa.struct [[TS4:!.*]] +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 3, i1 false), !tbaa.struct [[TS4:!.*]] struct six { char a; @@ -60,7 +60,7 @@ void copy5(struct six *a, struct six *b) { *a = *b; } -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 6, i32 1, i1 false), !tbaa.struct [[TS5:!.*]] +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 6, i1 false), !tbaa.struct [[TS5:!.*]] // CHECK: [[TS]] = !{i64 0, i64 2, !{{.*}}, i64 4, i64 4, !{{.*}}, i64 8, i64 1, !{{.*}}, i64 12, i64 4, !{{.*}}} // CHECK: [[CHAR:!.*]] = !{!"omnipotent char", !{{.*}}} Index: test/CodeGen/volatile.c =================================================================== --- test/CodeGen/volatile.c +++ test/CodeGen/volatile.c @@ -199,7 +199,7 @@ // CHECK: store volatile i32 {{.*}}, i32* @vtS (void)vF2; // From vF2 to a temporary -// CHECK: call void @llvm.memcpy.{{.*}}(i8* %{{.*}}, i8* {{.*}} @vF2 {{.*}}, i1 true) +// CHECK: call void @llvm.memcpy.{{.*}}(i8* align {{[0-9]+}} %{{.*}}, i8* {{.*}} @vF2 {{.*}}, i1 true) vF2 = vF2; // vF2 to itself // CHECK: call void @llvm.memcpy.{{.*}}(i8* {{.*@vF2.*}}, i8* {{.*@vF2.*}}, i1 true) @@ -209,6 +209,6 @@ // CHECK: call void @llvm.memcpy.{{.*}}(i8* {{.*@vF2.*}}, i8* {{.*@vF2.*}}, i1 true) vF2 = (vF2, vF2); // vF2 to a temporary, then vF2 to itself -// CHECK: call void @llvm.memcpy.{{.*}}(i8* %{{.*}}, i8* {{.*@vF2.*}}, i1 true) +// CHECK: call void @llvm.memcpy.{{.*}}(i8* align {{[0-9]+}} %{{.*}}, i8* {{.*@vF2.*}}, i1 true) // CHECK: call void @llvm.memcpy.{{.*}}(i8* {{.*@vF2.*}}, i8* {{.*@vF2.*}}, i1 true) } Index: test/CodeGen/wasm-varargs.c =================================================================== --- test/CodeGen/wasm-varargs.c +++ test/CodeGen/wasm-varargs.c @@ -93,11 +93,11 @@ // CHECK: [[R3:%[^,=]+]] = bitcast i8* [[ARGP_CUR]] to [[STRUCT_S]]* // CHECK: [[R4:%[^,=]+]] = bitcast [[STRUCT_S]]* [[V]] to i8* // CHECK: [[R5:%[^,=]+]] = bitcast [[STRUCT_S]]* [[R3]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[R4]], i8* [[R5]], i32 12, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[R4]], i8* align 4 [[R5]], i32 12, i1 false) // CHECK: [[VA2:%[^,=]+]] = bitcast i8** [[VA]] to i8* // CHECK: call void @llvm.va_end(i8* [[VA2]]) // CHECK: [[R6:%[^,=]+]] = bitcast [[STRUCT_S]]* %agg.result to i8* // CHECK: [[R7:%[^,=]+]] = bitcast [[STRUCT_S]]* [[V]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[R6]], i8* [[R7]], i32 12, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[R6]], i8* align 4 [[R7]], i32 12, i1 false) // CHECK: ret void // CHECK: } Index: test/CodeGen/windows-swiftcall.c =================================================================== --- test/CodeGen/windows-swiftcall.c +++ test/CodeGen/windows-swiftcall.c @@ -207,10 +207,10 @@ // CHECK: [[RET:%.*]] = alloca [[STRUCT:%.*]], align 1 // CHECK: [[RES:%.*]] = alloca [[STRUCT]], align 1 // CHECK: [[CAST:%.*]] = bitcast [[STRUCT]]* [[RES]] to i8* -// CHECK: call void @llvm.memset{{.*}}(i8* [[CAST]], i8 0, i64 5 +// CHECK: call void @llvm.memset{{.*}}(i8* align 1 [[CAST]], i8 0, i64 5 // CHECK: [[CASTRET:%.*]] = bitcast [[STRUCT]]* [[RET]] to i8* // CHECK: [[CASTRES:%.*]] = bitcast [[STRUCT]]* [[RES]] to i8* -// CHECK: call void @llvm.memcpy{{.*}}(i8* [[CASTRET]], i8* [[CASTRES]], i64 5 +// CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} [[CASTRET]], i8* align {{[0-9]+}} [[CASTRES]], i64 5 // CHECK: [[CAST:%.*]] = bitcast [[STRUCT]]* [[RET]] to { i64 }* // CHECK: [[GEP:%.*]] = getelementptr inbounds { i64 }, { i64 }* [[CAST]], i32 0, i32 0 // CHECK: [[R0:%.*]] = load i64, i64* [[GEP]], align 1 @@ -260,10 +260,10 @@ // CHECK: [[RET:%.*]] = alloca [[UNION:%.*]], align 8 // CHECK: [[RES:%.*]] = alloca [[UNION]], align 8 // CHECK: [[CAST:%.*]] = bitcast [[UNION]]* [[RES]] to i8* -// CHECK: call void @llvm.memcpy{{.*}}(i8* [[CAST]] +// CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} [[CAST]] // CHECK: [[CASTRET:%.*]] = bitcast [[UNION]]* [[RET]] to i8* // CHECK: [[CASTRES:%.*]] = bitcast [[UNION]]* [[RES]] to i8* -// CHECK: call void @llvm.memcpy{{.*}}(i8* [[CASTRET]], i8* [[CASTRES]] +// CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} [[CASTRET]], i8* align {{[0-9]+}} [[CASTRES]] // CHECK: [[CAST:%.*]] = bitcast [[UNION]]* [[RET]] to { i64 }* // CHECK: [[GEP:%.*]] = getelementptr inbounds { i64 }, { i64 }* [[CAST]], i32 0, i32 0 // CHECK: [[R0:%.*]] = load i64, i64* [[GEP]], align 8 Index: test/CodeGen/x86-atomic-long_double.c =================================================================== --- test/CodeGen/x86-atomic-long_double.c +++ test/CodeGen/x86-atomic-long_double.c @@ -15,12 +15,12 @@ // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] // CHECK: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16 // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128* // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16 // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 [[INC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16 // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128* // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16 @@ -46,10 +46,10 @@ // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] // CHECK32: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* - // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false) // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4 // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8* - // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false) // CHECK32: store x86_fp80 [[INC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4 // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* @@ -77,12 +77,12 @@ // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] // CHECK: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16 // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128* // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16 // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 [[DEC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16 // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128* // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16 @@ -108,10 +108,10 @@ // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] // CHECK32: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* - // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false) // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4 // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8* - // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false) // CHECK32: store x86_fp80 [[DEC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4 // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* @@ -140,12 +140,12 @@ // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] // CHECK: [[SUB_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]], // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16 // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128* // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16 // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 [[SUB_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16 // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128* // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16 @@ -177,10 +177,10 @@ // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] // CHECK32: [[INC_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]], // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* - // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false) // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4 // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8* - // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false) // CHECK32: store x86_fp80 [[INC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4 // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* @@ -203,7 +203,7 @@ // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8 // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 8 // CHECK: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[STORE_TEMP_VOID_PTR]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 16 // CHECK: [[STORE_TEMP_INT_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i128* // CHECK: [[STORE_TEMP_INT:%.+]] = load i128, i128* [[STORE_TEMP_INT_PTR]], align 16 @@ -213,7 +213,7 @@ // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4 // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 4 // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8* - // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[STORE_TEMP_VOID_PTR]], i8 0, i64 12, i1 false) // CHECK32: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 4 // CHECK32: [[ADDR_VOID:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i8* @@ -250,12 +250,12 @@ // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] // CHECK: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16 // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128* // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16 // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 [[INC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16 // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128* // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16 @@ -281,10 +281,10 @@ // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] // CHECK32: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* - // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false) // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4 // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8* - // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false) // CHECK32: store x86_fp80 [[INC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4 // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* @@ -311,12 +311,12 @@ // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] // CHECK: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16 // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128* // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16 // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 [[DEC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16 // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128* // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16 @@ -342,10 +342,10 @@ // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] // CHECK32: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* - // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false) // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4 // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8* - // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false) // CHECK32: store x86_fp80 [[DEC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4 // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* @@ -373,12 +373,12 @@ // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] // CHECK: [[SUB_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]], // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16 // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128* // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16 // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 [[SUB_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16 // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128* // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16 @@ -409,10 +409,10 @@ // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] // CHECK32: [[INC_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]], // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* - // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false) // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4 // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8* - // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i1 false) // CHECK32: store x86_fp80 [[INC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4 // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* @@ -435,7 +435,7 @@ // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8 // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 8 // CHECK: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[STORE_TEMP_VOID_PTR]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 16 // CHECK: [[STORE_TEMP_INT_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i128* // CHECK: [[STORE_TEMP_INT:%.+]] = load i128, i128* [[STORE_TEMP_INT_PTR]], align 16 @@ -445,7 +445,7 @@ // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4 // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 4 // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8* - // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: call void @llvm.memset.p0i8.i64(i8* align 4 [[STORE_TEMP_VOID_PTR]], i8 0, i64 12, i1 false) // CHECK32: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 4 // CHECK32: [[ADDR_VOID:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i8* Index: test/CodeGen/x86_32-arguments-realign.c =================================================================== --- test/CodeGen/x86_32-arguments-realign.c +++ test/CodeGen/x86_32-arguments-realign.c @@ -2,7 +2,7 @@ // RUN: FileCheck < %t %s // CHECK-LABEL: define void @f0(%struct.s0* byval align 4) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 16, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %{{.*}}, i8* align 4 %{{.*}}, i32 16, i1 false) // CHECK: } struct s0 { long double a; }; void f0(struct s0 a0) { Index: test/CodeGen/x86_64-arguments.c =================================================================== --- test/CodeGen/x86_64-arguments.c +++ test/CodeGen/x86_64-arguments.c @@ -434,7 +434,7 @@ // CHECK-NEXT: [[CASTED_VALUE_ADDR:%.*]] = bitcast i8* [[VALUE_ADDR]] to [[STRUCT_TEST51]] // CHECK-NEXT: [[CASTED_TMP_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[TMP_ADDR]] to i8* // CHECK-NEXT: [[RECASTED_VALUE_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[CASTED_VALUE_ADDR]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CASTED_TMP_ADDR]], i8* [[RECASTED_VALUE_ADDR]], i64 16, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[CASTED_TMP_ADDR]], i8* align 8 [[RECASTED_VALUE_ADDR]], i64 16, i1 false) // CHECK-NEXT: add i32 {{.*}}, 16 // CHECK-NEXT: store i32 {{.*}}, i32* {{.*}} // CHECK-NEXT: br label Index: test/CodeGen/xcore-abi.c =================================================================== --- test/CodeGen/xcore-abi.c +++ test/CodeGen/xcore-abi.c @@ -80,7 +80,7 @@ // CHECK: store i8* [[IN]], i8** [[AP]] // CHECK: [[V1:%[a-z0-9]+]] = bitcast %struct.x* [[V:%[a-z0-9]+]] to i8* // CHECK: [[P1:%[a-z0-9]+]] = bitcast %struct.x* [[P]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[V1]], i8* [[P1]], i32 20, i32 4, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[V1]], i8* align 4 [[P1]], i32 20, i1 false) // CHECK: [[V2:%[a-z0-9]+]] = bitcast %struct.x* [[V]] to i8* // CHECK: call void @f(i8* [[V2]]) @@ -93,7 +93,7 @@ // CHECK: store i8* [[IN]], i8** [[AP]] // CHECK: [[V1:%[a-z0-9]+]] = bitcast [4 x i32]* [[V0:%[a-z0-9]+]] to i8* // CHECK: [[P1:%[a-z0-9]+]] = bitcast [4 x i32]* [[P]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[V1]], i8* [[P1]], i32 16, i32 4, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[V1]], i8* align 4 [[P1]], i32 16, i1 false) // CHECK: [[V2:%[a-z0-9]+]] = getelementptr inbounds [4 x i32], [4 x i32]* [[V0]], i32 0, i32 0 // CHECK: store i32* [[V2]], i32** [[V:%[a-z0-9]+]], align 4 // CHECK: [[V3:%[a-z0-9]+]] = load i32*, i32** [[V]], align 4 Index: test/CodeGenCXX/alignment.cpp =================================================================== --- test/CodeGenCXX/alignment.cpp +++ test/CodeGenCXX/alignment.cpp @@ -204,7 +204,7 @@ // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[A]], [[A]]* [[A_P]], i32 0, i32 0 // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8* // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 64 [[T0]], i8* align 16 [[T1]], i64 16, i1 false) AlignedArray result = a.aArray; } @@ -223,7 +223,7 @@ // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[A]], [[A]]* [[A_P]], i32 0, i32 0 // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8* // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 64 [[T0]], i8* align 16 [[T1]], i64 16, i1 false) AlignedArray result = b.aArray; } @@ -234,7 +234,7 @@ // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[B]], [[B]]* [[B_P]], i32 0, i32 2 // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8* // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 8, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 64 [[T0]], i8* align 8 [[T1]], i64 16, i1 false) AlignedArray result = b.bArray; } @@ -245,7 +245,7 @@ // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[B]], [[B]]* [[B_P]], i32 0, i32 2 // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8* // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 8, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 64 [[T0]], i8* align 8 [[T1]], i64 16, i1 false) AlignedArray result = b->bArray; } @@ -256,7 +256,7 @@ // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[B]], [[B]]* [[B_P]], i32 0, i32 2 // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8* // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 64 [[T0]], i8* align 16 [[T1]], i64 16, i1 false) B b; AlignedArray result = b.bArray; } @@ -277,7 +277,7 @@ // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[A]], [[A]]* [[A_P]], i32 0, i32 0 // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8* // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 16, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 64 [[T0]], i8* align 16 [[T1]], i64 16, i1 false) D d; AlignedArray result = d.aArray; } @@ -292,7 +292,7 @@ // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[B]], [[B]]* [[B_P]], i32 0, i32 2 // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8* // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8* - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 8, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 64 [[T0]], i8* align 8 [[T1]], i64 16, i1 false) D d; AlignedArray result = d.bArray; } Index: test/CodeGenCXX/assign-construct-memcpy.cpp =================================================================== --- test/CodeGenCXX/assign-construct-memcpy.cpp +++ test/CodeGenCXX/assign-construct-memcpy.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple x86_64-apple-darwin12 -emit-llvm -o - -std=c++11 %s -DPOD | FileCheck %s -check-prefix=CHECK-POD +/// RUN: %clang_cc1 -triple x86_64-apple-darwin12 -emit-llvm -o - -std=c++11 %s -DPOD | FileCheck %s -check-prefix=CHECK-POD // RUN: %clang_cc1 -triple x86_64-apple-darwin12 -emit-llvm -o - -std=c++11 %s | FileCheck %s -check-prefix=CHECK-NONPOD // Declare the reserved placement operators. @@ -23,47 +23,47 @@ foo *test1(void *f, const foo &x) { return new (f) foo(x); // CHECK-POD: test1 -// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24 // CHECK-NONPOD: test1 -// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24 } foo *test2(const foo &x) { return new foo(x); // CHECK-POD: test2 -// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 16 {{.*}} align 8 {{.*}}i64 24 // CHECK-NONPOD: test2 -// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 16 {{.*}} align 8 {{.*}}i64 24 } foo test3(const foo &x) { foo f = x; return f; // CHECK-POD: test3 -// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24 // CHECK-NONPOD: test3 -// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24 } foo *test4(foo &&x) { return new foo(x); // CHECK-POD: test4 -// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 16 {{.*}} align 8 {{.*}}i64 24 // CHECK-NONPOD: test4 -// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 16 {{.*}} align 8 {{.*}}i64 24 } void test5(foo &f, const foo &x) { f = x; // CHECK-POD: test5 -// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24 // CHECK-NONPOD: test5 -// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 17, i32 8 +// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 17 } extern foo globtest; @@ -71,10 +71,10 @@ void test6(foo &&x) { globtest = move(x); // CHECK-POD: test6 -// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24 // CHECK-NONPOD: test6 -// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 17, i32 8 +// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 17 } void byval(foo f); @@ -82,8 +82,8 @@ void test7(const foo &x) { byval(x); // CHECK-POD: test7 -// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +// CHECK-POD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24 // CHECK-NONPOD: test7 -// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 24, i32 8 +// CHECK-NONPOD: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 24 } Index: test/CodeGenCXX/constructor-direct-call.cpp =================================================================== --- test/CodeGenCXX/constructor-direct-call.cpp +++ test/CodeGenCXX/constructor-direct-call.cpp @@ -9,7 +9,7 @@ Test1 var; var.Test1::Test1(); - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 4, i32 4, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i32 4, i1 false) var.Test1::Test1(var); } @@ -28,7 +28,7 @@ // CHECK-NEXT: call x86_thiscallcc void @_ZN5Test2C1Ev(%class.Test2* %var) var.Test2::Test2(); - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 8, i32 4, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i32 8, i1 false) var.Test2::Test2(var); } Index: test/CodeGenCXX/copy-constructor-elim.cpp =================================================================== --- test/CodeGenCXX/copy-constructor-elim.cpp +++ test/CodeGenCXX/copy-constructor-elim.cpp @@ -56,4 +56,4 @@ // Make sure that we obey the destination's alignment requirements when emitting // the copy. // CHECK-LABEL: define {{.*}} @f( -// CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}({{.*}}, i8* bitcast (%struct.V* @gv1 to i8*), {{i64|i32}} 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}({{.*}}align 4{{.*}}, i8* align 8 bitcast (%struct.V* @gv1 to i8*), {{i64|i32}} 4, i1 false) Index: test/CodeGenCXX/copy-constructor-synthesis-2.cpp =================================================================== --- test/CodeGenCXX/copy-constructor-synthesis-2.cpp +++ test/CodeGenCXX/copy-constructor-synthesis-2.cpp @@ -10,15 +10,15 @@ PR23373 pr23373_b(pr23373_a); // CHECK-LABEL: define {{.*}} @__cxx_global_var_init( -// CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i32|i64}}({{.*}} @pr23373_b{{.*}}, {{.*}} @pr23373_a{{.*}}, [[W:i32|i64]] 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i32|i64}}({{.*}}align 4{{.*}}@pr23373_b{{.*}}, {{.*}}align 4{{.*}} @pr23373_a{{.*}}, [[W:i32|i64]] 4, i1 false) PR23373 pr23373_f() { return pr23373_a; } // CHECK-LABEL: define {{.*}} @_Z9pr23373_fv( -// CHECK: call void @llvm.memcpy.p0i8.p0i8.[[W]]({{.*}}, [[W]] 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.[[W]]({{.*}}align 4{{.*}}align 4{{.*}}, [[W]] 4, i1 false) void pr23373_g(PR23373 &a, PR23373 &b) { a = b; } // CHECK-LABEL: define {{.*}} @_Z9pr23373_g -// CHECK: call void @llvm.memcpy.p0i8.p0i8.[[W]]({{.*}}, [[W]] 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.[[W]]({{.*}}align 4{{.*}}align 4{{.*}}, [[W]] 4, i1 false) struct A { virtual void a(); }; A x(A& y) { return y; } Index: test/CodeGenCXX/copy-constructor-synthesis.cpp =================================================================== --- test/CodeGenCXX/copy-constructor-synthesis.cpp +++ test/CodeGenCXX/copy-constructor-synthesis.cpp @@ -143,7 +143,7 @@ // CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [[A]], [[A]]* [[OTHER]], i32 0, i32 1 // CHECK-NEXT: [[T4:%.*]] = bitcast i16* [[T0]] to i8* // CHECK-NEXT: [[T5:%.*]] = bitcast i16* [[T2]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T4]], i8* [[T5]], i64 8, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[T4]], i8* align 8 [[T5]], i64 8, i1 false) // CHECK-NEXT: ret [[A]]* [[THIS]] // CHECK-LABEL: define linkonce_odr void @_ZN6PR66281BC2ERKS0_(%"struct.PR6628::B"* %this, %"struct.PR6628::B"* dereferenceable({{[0-9]+}})) unnamed_addr @@ -172,7 +172,7 @@ // CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [[A]], [[A]]* [[OTHER]], i32 0, i32 1 // CHECK-NEXT: [[T4:%.*]] = bitcast i16* [[T0]] to i8* // CHECK-NEXT: [[T5:%.*]] = bitcast i16* [[T2]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T4]], i8* [[T5]], i64 8, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[T4]], i8* align 8 [[T5]], i64 8, i1 false) // CHECK-NEXT: ret void } Index: test/CodeGenCXX/cxx0x-delegating-ctors.cpp =================================================================== --- test/CodeGenCXX/cxx0x-delegating-ctors.cpp +++ test/CodeGenCXX/cxx0x-delegating-ctors.cpp @@ -66,7 +66,7 @@ X::X(int) : X() {} } // CHECK: define {{.*}} @_ZN7PR128901XC1Ei(%"class.PR12890::X"* %this, i32) -// CHECK: call void @llvm.memset.p0i8.{{i32|i64}}(i8* {{.*}}, i8 0, {{i32|i64}} 4, i32 4, i1 false) +// CHECK: call void @llvm.memset.p0i8.{{i32|i64}}(i8* align 4 {{.*}}, i8 0, {{i32|i64}} 4, i1 false) namespace PR14588 { void other(); Index: test/CodeGenCXX/cxx0x-initializer-array.cpp =================================================================== --- test/CodeGenCXX/cxx0x-initializer-array.cpp +++ test/CodeGenCXX/cxx0x-initializer-array.cpp @@ -39,7 +39,7 @@ // CHECK: store i32 -1, Agg2 b = { n }; - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* bitcast ([3 x i32]* @[[THREE_NULL_MEMPTRS]] to i8*), i32 12, i32 4, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %{{.*}}, i8* align 4 bitcast ([3 x i32]* @[[THREE_NULL_MEMPTRS]] to i8*), i32 12, i1 false) } // Test dynamic initialization. Index: test/CodeGenCXX/cxx11-initializer-array-new.cpp =================================================================== --- test/CodeGenCXX/cxx11-initializer-array-new.cpp +++ test/CodeGenCXX/cxx11-initializer-array-new.cpp @@ -154,7 +154,7 @@ // // CHECK: %[[SIZE:.*]] = sub i64 %{{.*}}, 24 // CHECK: %[[REST:.*]] = bitcast %[[T]]* %[[T_2_AS_T]] to i8* -// CHECK: call void @llvm.memset.p0i8.i64(i8* %[[REST]], i8 0, i64 %[[SIZE]], i32 4, i1 false) +// CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %[[REST]], i8 0, i64 %[[SIZE]], i1 false) // // CHECK: } Index: test/CodeGenCXX/cxx1z-lambda-star-this.cpp =================================================================== --- test/CodeGenCXX/cxx1z-lambda-star-this.cpp +++ test/CodeGenCXX/cxx1z-lambda-star-this.cpp @@ -16,7 +16,7 @@ //CHECK: %[[I1:.+]] = bitcast %struct.A* %[[I0]] to i8* //CHECK: %[[I2:.+]] = bitcast %struct.A* %this1 to i8* // copy the contents ... -//CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[I1]], i8* %[[I2]], i32 8, i32 8, i1 false) +//CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %[[I1]], i8* align 8 %[[I2]], i32 8, i1 false) struct B { double b = 222; Index: test/CodeGenCXX/eh.cpp =================================================================== --- test/CodeGenCXX/eh.cpp +++ test/CodeGenCXX/eh.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -triple x86_64-apple-darwin -std=c++11 -emit-llvm %s -o %t.ll -// RUN: FileCheck --input-file=%t.ll %s +// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -triple x86_64-apple-darwin -std=c++11 -emit-llvm %s -o - \ +// RUN: | FileCheck %s struct test1_D { double d; @@ -13,7 +13,7 @@ // CHECK: [[EXNOBJ:%.*]] = call i8* @__cxa_allocate_exception(i64 8) // CHECK-NEXT: [[EXN:%.*]] = bitcast i8* [[EXNOBJ]] to [[DSTAR:%[^*]*\*]] // CHECK-NEXT: [[EXN2:%.*]] = bitcast [[DSTAR]] [[EXN]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[EXN2]], i8* bitcast ([[DSTAR]] @d1 to i8*), i64 8, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[EXN2]], i8* align 8 bitcast ([[DSTAR]] @d1 to i8*), i64 8, i1 false) // CHECK-NEXT: call void @__cxa_throw(i8* [[EXNOBJ]], i8* bitcast ({ i8*, i8* }* @_ZTI7test1_D to i8*), i8* null) [[NR:#[0-9]+]] // CHECK-NEXT: unreachable @@ -466,7 +466,7 @@ // CHECK: [[T0:%.*]] = call i8* @__cxa_allocate_exception(i64 16) // CHECK-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to %"class.test17::DerivedException"* // CHECK-NEXT: [[T2:%.*]] = bitcast %"class.test17::DerivedException"* [[T1]] to i8* - // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T2]], i8 0, i64 16, i32 16, i1 false) + // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 [[T2]], i8 0, i64 16, i1 false) } } Index: test/CodeGenCXX/float16-declarations.cpp =================================================================== --- test/CodeGenCXX/float16-declarations.cpp +++ test/CodeGenCXX/float16-declarations.cpp @@ -108,7 +108,7 @@ S1<_Float16> s1 = { 132.f16 }; // CHECK-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant %struct.S1 { half 0xH5820 }, align 2 // CHECK-DAG: [[S1:%[0-9]+]] = bitcast %struct.S1* %{{.*}} to i8* -// CHECK-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[S1]], i8* bitcast (%struct.S1* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false) +// CHECK-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[S1]], i8* align 2 bitcast (%struct.S1* @_ZZ4mainE2s1 to i8*), i64 2, i1 false) _Float16 f4l = func1n(f1l) + func1f(f2l) + c1.func1c(f3l) + c1.func2c(f1l) + func1t(f1l) + s1.mem2 - f1n + f2n; Index: test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp =================================================================== --- test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp +++ test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp @@ -405,7 +405,7 @@ // WIN32: %[[argmem:[^ ]*]] = alloca inalloca [[argmem_ty]] // WIN32: %[[gep1:[^ ]*]] = getelementptr inbounds [[argmem_ty]], [[argmem_ty]]* %[[argmem]], i32 0, i32 1 // WIN32: %[[bc1:[^ ]*]] = bitcast %struct.SmallWithDtor* %[[gep1]] to i8* -// WIN32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[bc1]], i8* {{.*}}, i32 4, i32 4, i1 false) +// WIN32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %[[bc1]], i8* align 4 {{.*}}, i32 4, i1 false) // WIN32: %[[a2:[^ ]*]] = load void [[dst_ty]], void [[dst_ty]]* %[[a1]], align 4 // WIN32: %[[gep2:[^ ]*]] = getelementptr inbounds [[argmem_ty]], [[argmem_ty]]* %[[argmem]], i32 0, i32 0 // WIN32: %[[addr:[^ ]*]] = bitcast {}** %[[gep2]] to void [[dst_ty]]* Index: test/CodeGenCXX/microsoft-abi-virtual-inheritance.cpp =================================================================== --- test/CodeGenCXX/microsoft-abi-virtual-inheritance.cpp +++ test/CodeGenCXX/microsoft-abi-virtual-inheritance.cpp @@ -490,7 +490,7 @@ // CHECK: %[[B:.*]] = bitcast %"struct.test5::C"* %[[THIS]] to %"struct.test5::B"* // CHECK: %[[B_i8:.*]] = bitcast %"struct.test5::B"* %[[B]] to i8* // CHECK: %[[FIELD:.*]] = getelementptr inbounds i8, i8* %[[B_i8]], i32 4 -// CHECK: call void @llvm.memset.p0i8.i32(i8* %[[FIELD]], i8 0, i32 4, i32 4, i1 false) +// CHECK: call void @llvm.memset.p0i8.i32(i8* align 4 %[[FIELD]], i8 0, i32 4, i1 false) } namespace pr27621 { @@ -530,5 +530,5 @@ // CHECK: %[[C:.*]] = bitcast %"class.test6::D"* %[[THIS]] to %"class.test6::C"* // CHECK: %[[C_i8:.*]] = bitcast %"class.test6::C"* %[[C]] to i8* // CHECK: %[[FIELD:.*]] = getelementptr inbounds i8, i8* %[[C_i8]], i32 8 -// CHECK: call void @llvm.memset.p0i8.i32(i8* %[[FIELD]], i8 0, i32 4, i32 4, i1 false) +// CHECK: call void @llvm.memset.p0i8.i32(i8* align 4 %[[FIELD]], i8 0, i32 4, i1 false) } Index: test/CodeGenCXX/microsoft-uuidof.cpp =================================================================== --- test/CodeGenCXX/microsoft-uuidof.cpp +++ test/CodeGenCXX/microsoft-uuidof.cpp @@ -63,12 +63,12 @@ // CHECK: @_GUID_87654321_4321_4321_4321_ba0987654321 = linkonce_odr constant { i32, i16, i16, [8 x i8] } { i32 -2023406815, i16 17185, i16 17185, [8 x i8] c"C!\BA\09\87eC!" }, comdat // The static initializer for thing. -// CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast (%struct._GUID* @thing to i8*), i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ac to i8*), i32 16, i32 4, i1 false) -// CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast (%struct._GUID* @thing to i8*), i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ac to i8*), i32 4, i32 4, i1 false) +// CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast (%struct._GUID* @thing to i8*), i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ac to i8*), i32 16, i1 false) +// CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast (%struct._GUID* @thing to i8*), i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ac to i8*), i32 4, i1 false) // The static initializer for g. -// CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast (%struct._GUID* @g to i8*), i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i32 4, i1 false) -// CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast (%struct._GUID* @g to i8*), i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i32 4, i1 false) +// CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast (%struct._GUID* @g to i8*), i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i1 false) +// CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast (%struct._GUID* @g to i8*), i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i1 false) #ifdef DEFINE_GUID void fun() { @@ -81,20 +81,20 @@ // CHECK-DEFINE-GUID: [[U1:%.+]] = bitcast %struct._GUID* %s1_1 to i8* // CHECK-DEFINE-WRONG-GUID: [[U1:%.+]] = bitcast %struct._GUID* %s1_1 to i8* - // CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[U1]], i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i32 4, i1 false) - // CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[U1]], i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i32 4, i1 false) + // CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[U1]], i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i1 false) + // CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[U1]], i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i1 false) GUID s1_1 = __uuidof(S1); // CHECK-DEFINE-GUID: [[U2:%.+]] = bitcast %struct._GUID* %s1_2 to i8* // CHECK-DEFINE-WRONG-GUID: [[U2:%.+]] = bitcast %struct._GUID* %s1_2 to i8* - // CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[U2]], i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i32 4, i1 false) - // CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[U2]], i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i32 4, i1 false) + // CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[U2]], i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i1 false) + // CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[U2]], i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i1 false) GUID s1_2 = __uuidof(S1); // CHECK-DEFINE-GUID: [[U3:%.+]] = bitcast %struct._GUID* %s1_3 to i8* // CHECK-DEFINE-WRONG-GUID: [[U3:%.+]] = bitcast %struct._GUID* %s1_3 to i8* - // CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[U3]], i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i32 4, i1 false) - // CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[U3]], i8* bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i32 4, i1 false) + // CHECK-DEFINE-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[U3]], i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 16, i1 false) + // CHECK-DEFINE-WRONG-GUID: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[U3]], i8* align 4 bitcast ({ i32, i16, i16, [8 x i8] }* @_GUID_12345678_1234_1234_1234_1234567890ab to i8*), i32 4, i1 false) GUID s1_3 = __uuidof(s1); } #endif Index: test/CodeGenCXX/new-array-init.cpp =================================================================== --- test/CodeGenCXX/new-array-init.cpp +++ test/CodeGenCXX/new-array-init.cpp @@ -50,10 +50,10 @@ // FIXME: Conditionally throw an exception rather than passing -1 to alloc function // CHECK: select // CHECK: %[[PTR:.*]] = call i8* @_Zna{{.}}(i{{32|64}} - // CHECK: call void @llvm.memcpy{{.*}}(i8* %[[PTR]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @[[ABC4]], i32 0, i32 0), i32 4, + // CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %[[PTR]], i8* align {{[0-9]+}} getelementptr inbounds ([4 x i8], [4 x i8]* @[[ABC4]], i32 0, i32 0), i32 4, // CHECK: %[[REST:.*]] = getelementptr inbounds i8, i8* %[[PTR]], i32 4 // CHECK: %[[RESTSIZE:.*]] = sub {{.*}}, 4 - // CHECK: call void @llvm.memset{{.*}}(i8* %[[REST]], i8 0, i{{32|64}} %[[RESTSIZE]], + // CHECK: call void @llvm.memset{{.*}}(i8* align {{[0-9]+}} %[[REST]], i8 0, i{{32|64}} %[[RESTSIZE]], new char[n] { "abc" }; } @@ -61,7 +61,7 @@ void string_exact() { // CHECK-NOT: icmp // CHECK: %[[PTR:.*]] = call i8* @_Zna{{.}}(i{{32|64}} 4) - // CHECK: call void @llvm.memcpy{{.*}}(i8* %[[PTR]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @[[ABC4]], i32 0, i32 0), i32 4, + // CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %[[PTR]], i8* align {{[0-9]+}} getelementptr inbounds ([4 x i8], [4 x i8]* @[[ABC4]], i32 0, i32 0), i32 4, // CHECK-NOT: memset new char[4] { "abc" }; } @@ -71,7 +71,7 @@ // CHECK-NOT: icmp // CHECK: %[[PTR:.*]] = call i8* @_Zna{{.}}(i{{32|64}} 15) // FIXME: For very large arrays, it would be preferable to emit a small copy and a memset. - // CHECK: call void @llvm.memcpy{{.*}}(i8* %[[PTR]], i8* getelementptr inbounds ([15 x i8], [15 x i8]* @[[ABC15]], i32 0, i32 0), i32 15, + // CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %[[PTR]], i8* align {{[0-9]+}} getelementptr inbounds ([15 x i8], [15 x i8]* @[[ABC15]], i32 0, i32 0), i32 15, // CHECK-NOT: memset new char[15] { "abc" }; } @@ -113,7 +113,7 @@ // CHECK: %[[PTR2:.*]] = getelementptr inbounds %[[AGGR]], %[[AGGR]]* %[[PTR1]], i32 1{{$}} // CHECK: %[[REMAIN:.*]] = sub i32 {{.*}}, 16 // CHECK: %[[MEM:.*]] = bitcast %[[AGGR]]* %[[PTR2]] to i8* - // CHECK: call void @llvm.memset{{.*}}(i8* %[[MEM]], i8 0, i32 %[[REMAIN]], + // CHECK: call void @llvm.memset{{.*}}(i8* align {{[0-9]+}} %[[MEM]], i8 0, i32 %[[REMAIN]], struct Aggr { int a, b; }; new Aggr[n] { 1, 2, 3 }; } Index: test/CodeGenCXX/no-opt-volatile-memcpy.cpp =================================================================== --- test/CodeGenCXX/no-opt-volatile-memcpy.cpp +++ test/CodeGenCXX/no-opt-volatile-memcpy.cpp @@ -18,10 +18,10 @@ // CHECK: %[[LS:.*]] = alloca %struct.s, align 4 // CHECK-NEXT: %[[ZERO:.*]] = bitcast %struct.s* %[[LS]] to i8* // CHECK-NEXT: %[[ONE:.*]] = bitcast %struct.s* %[[LS]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* %[[ZERO]], i8* %[[ONE]], i64 132, i32 4, i1 true) -// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true) +// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* align 4 %[[ZERO]], i8* align 4 %[[ONE]], i64 132, i1 true) +// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i1 true) // CHECK-NEXT: %[[TWO:.*]] = bitcast %struct.s* %[[LS]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* %[[TWO]], i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true) +// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* align 4 %[[TWO]], i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i1 true) struct s1 { @@ -35,8 +35,8 @@ s.y = gs; } // CHECK-LABEL: define void @_Z3feev() -// CHECK: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true) -// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true) +// CHECK: call void @llvm.memcpy.{{.*}}(i8* align 4 getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* align 4 getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i64 132, i1 true) +// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* align 4 getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i1 true) struct d : s1 { }; @@ -47,4 +47,4 @@ gd = gd; } // CHECK-LABEL: define void @_Z4gorfv() -// CHECK: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.d, %struct.d* @gd, i32 0, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.d, %struct.d* @gd, i32 0, i32 0, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true) +// CHECK: call void @llvm.memcpy.{{.*}}(i8* align 4 getelementptr inbounds (%struct.d, %struct.d* @gd, i32 0, i32 0, i32 0, i32 0, i32 0), i8* align 4 getelementptr inbounds (%struct.d, %struct.d* @gd, i32 0, i32 0, i32 0, i32 0, i32 0), i64 132, i1 true) Index: test/CodeGenCXX/pod-member-memcpys.cpp =================================================================== --- test/CodeGenCXX/pod-member-memcpys.cpp +++ test/CodeGenCXX/pod-member-memcpys.cpp @@ -116,59 +116,59 @@ // Basic copy-assignment: // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.Basic* @_ZN5BasicaSERKS_(%struct.Basic* %this, %struct.Basic* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: ret %struct.Basic* // PODMember copy-assignment: // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.PODMember* @_ZN9PODMemberaSERKS_(%struct.PODMember* %this, %struct.PODMember* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 32, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 32, i1 {{.*}}) // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: ret %struct.PODMember* // PODLikeMember copy-assignment: // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.PODLikeMember* @_ZN13PODLikeMemberaSERKS_(%struct.PODLikeMember* %this, %struct.PODLikeMember* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 32, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 32, i1 {{.*}}) // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: ret %struct.PODLikeMember* // ArrayMember copy-assignment: // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.ArrayMember* @_ZN11ArrayMemberaSERKS_(%struct.ArrayMember* %this, %struct.ArrayMember* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 64, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 64, i1 {{.*}}) // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 64, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 64, i1 {{.*}}) // CHECK: ret %struct.ArrayMember* // VolatileMember copy-assignment: // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.VolatileMember* @_ZN14VolatileMemberaSERKS_(%struct.VolatileMember* %this, %struct.VolatileMember* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: load volatile i32, i32* {{.*}}, align 4 // CHECK: store volatile i32 {{.*}}, align 4 // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: ret %struct.VolatileMember* // BitfieldMember copy-assignment: // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.BitfieldMember* @_ZN14BitfieldMemberaSERKS_(%struct.BitfieldMember* %this, %struct.BitfieldMember* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 3, i32 1{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 1 {{.*}} align 1 {{.*}}i64 3, i1 {{.*}}) // CHECK: ret %struct.BitfieldMember* // InnerClass copy-assignment: // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.InnerClassMember* @_ZN16InnerClassMemberaSERKS_(%struct.InnerClassMember* %this, %struct.InnerClassMember* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 32, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 32, i1 {{.*}}) // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: ret %struct.InnerClassMember* // PackedMembers copy-assignment: // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.PackedMembers* @_ZN13PackedMembersaSERKS_(%struct.PackedMembers* %this, %struct.PackedMembers* dereferenceable({{[0-9]+}})) // CHECK: call dereferenceable({{[0-9]+}}) %struct.NonPOD* @_ZN6NonPODaSERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 1{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 1 {{.*}} align 1 {{.*}}i64 16, i1 {{.*}}) // CHECK: ret %struct.PackedMembers* // COPY-CONSTRUCTORS: @@ -183,70 +183,70 @@ // PackedMembers copy-assignment: // CHECK-LABEL: define linkonce_odr void @_ZN13PackedMembersC2ERKS_(%struct.PackedMembers* %this, %struct.PackedMembers* dereferenceable({{[0-9]+}})) // CHECK: call void @_ZN6NonPODC1ERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 1{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 1 {{.*}} align 1 {{.*}}i64 16, i1 {{.*}}) // CHECK: ret void CALL_CC(BitfieldMember2) // BitfieldMember2 copy-constructor: // CHECK-2-LABEL: define linkonce_odr void @_ZN15BitfieldMember2C2ERKS_(%struct.BitfieldMember2* %this, %struct.BitfieldMember2* dereferenceable({{[0-9]+}})) -// CHECK-2: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4, i1 false) +// CHECK-2: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 false) // CHECK-2: call void @_ZN6NonPODC1ERKS_ // CHECK-2: ret void CALL_CC(BitfieldMember3) // BitfieldMember3 copy-constructor: // CHECK-LABEL: define linkonce_odr void @_ZN15BitfieldMember3C2ERKS_(%struct.BitfieldMember3* %this, %struct.BitfieldMember3* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 8, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 8, i1 false) // CHECK: ret void CALL_CC(ReferenceMember) // ReferenceMember copy-constructor: // CHECK-LABEL: define linkonce_odr void @_ZN15ReferenceMemberC2ERKS_(%struct.ReferenceMember* %this, %struct.ReferenceMember* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 8{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 16, i1 {{.*}}) // CHECK: call void @_ZN6NonPODC1ERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 8{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 8 {{.*}} align 8 {{.*}}i64 16, i1 {{.*}}) // CHECK: ret void CALL_CC(InnerClassMember) // InnerClass copy-constructor: // CHECK-LABEL: define linkonce_odr void @_ZN16InnerClassMemberC2ERKS_(%struct.InnerClassMember* %this, %struct.InnerClassMember* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 32, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 32, i1 {{.*}}) // CHECK: call void @_ZN6NonPODC1ERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: ret void CALL_CC(BitfieldMember) // BitfieldMember copy-constructor: // CHECK-LABEL: define linkonce_odr void @_ZN14BitfieldMemberC2ERKS_(%struct.BitfieldMember* %this, %struct.BitfieldMember* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: call void @_ZN6NonPODC1ERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 3, i32 1{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 1 {{.*}} align 1 {{.*}}i64 3, i1 {{.*}}) // CHECK: ret void CALL_CC(VolatileMember) // VolatileMember copy-constructor: // CHECK-LABEL: define linkonce_odr void @_ZN14VolatileMemberC2ERKS_(%struct.VolatileMember* %this, %struct.VolatileMember* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: load volatile i32, i32* {{.*}}, align 4 // CHECK: store volatile i32 {{.*}}, align 4 // CHECK: call void @_ZN6NonPODC1ERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: ret void CALL_CC(ArrayMember) // ArrayMember copy-constructor: // CHECK-LABEL: define linkonce_odr void @_ZN11ArrayMemberC2ERKS_(%struct.ArrayMember* %this, %struct.ArrayMember* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 64, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 64, i1 {{.*}}) // CHECK: call void @_ZN6NonPODC1ERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 64, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 64, i1 {{.*}}) // CHECK: ret void CALL_CC(PODLikeMember) // PODLikeMember copy-constructor: // CHECK-LABEL: define linkonce_odr void @_ZN13PODLikeMemberC2ERKS_(%struct.PODLikeMember* %this, %struct.PODLikeMember* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 32, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 32, i1 {{.*}}) // CHECK: invoke void @_ZN6NonPODC1ERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: ret void // CHECK: landingpad // CHECK: invoke void @_ZN7PODLikeD1Ev @@ -254,15 +254,15 @@ CALL_CC(PODMember) // PODMember copy-constructor: // CHECK-LABEL: define linkonce_odr void @_ZN9PODMemberC2ERKS_(%struct.PODMember* %this, %struct.PODMember* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 32, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 32, i1 {{.*}}) // CHECK: call void @_ZN6NonPODC1ERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: ret void CALL_CC(Basic) // Basic copy-constructor: // CHECK-LABEL: define linkonce_odr void @_ZN5BasicC2ERKS_(%struct.Basic* %this, %struct.Basic* dereferenceable({{[0-9]+}})) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: call void @_ZN6NonPODC1ERKS_ -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}i64 16, i32 4{{.*}}) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}} align 4 {{.*}} align 4 {{.*}}i64 16, i1 {{.*}}) // CHECK: ret void Index: test/CodeGenCXX/pr20897.cpp =================================================================== --- test/CodeGenCXX/pr20897.cpp +++ test/CodeGenCXX/pr20897.cpp @@ -9,7 +9,7 @@ // CHECK: %[[dest_a_gep:.*]] = getelementptr inbounds %struct.Derived, %struct.Derived* %[[this]], i32 0, i32 1 // CHECK-NEXT: %[[src_load:.*]] = load %struct.Derived*, %struct.Derived** {{.*}} // CHECK-NEXT: %[[src_a_gep:.*]] = getelementptr inbounds %struct.Derived, %struct.Derived* %[[src_load:.*]], i32 0, i32 1 -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[dest_a_gep]], i8* %[[src_a_gep]], i64 1, i32 4, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %[[dest_a_gep]], i8* align 4 %[[src_a_gep]], i64 1, i1 false) // CHECK-NEXT: %[[dest_this:.*]] = load %struct.Derived*, %struct.Derived** %[[retval]] // CHECK-NEXT: ret %struct.Derived* %[[dest_this]] bool a : 1; @@ -26,7 +26,7 @@ // CHECK-NEXT: %[[src_a_gep:.*]] = getelementptr inbounds %struct.Derived2, %struct.Derived2* %[[src_load:.*]], i32 0, i32 1 // CHECK-NEXT: %[[dest_a_bitcast:.*]] = bitcast [1 x i32]* %[[dest_a_gep]] to i8* // CHECK-NEXT: %[[src_a_bitcast:.*]] = bitcast [1 x i32]* %[[src_a_gep]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[dest_a_bitcast]], i8* %[[src_a_bitcast]], i32 4, i32 4, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %[[dest_a_bitcast]], i8* align 4 %[[src_a_bitcast]], i32 4, i1 false) // CHECK-NEXT: %[[dest_this:.*]] = load %struct.Derived2*, %struct.Derived2** %[[retval]] // CHECK-NEXT: ret %struct.Derived2* %[[dest_this]] int Array[1]; Index: test/CodeGenCXX/value-init.cpp =================================================================== --- test/CodeGenCXX/value-init.cpp +++ test/CodeGenCXX/value-init.cpp @@ -244,7 +244,7 @@ struct C : B { C(); }; C::C() : A(3), B() {} // CHECK-LABEL: define void @_ZN7PR111241CC1Ev - // CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 12, i32 8, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 {{.*}}, i8 0, i64 12, i1 false) // CHECK-NEXT: call void @_ZN7PR111241BC2Ev // Make sure C::C doesn't overwrite parts of A while it is zero-initializing B @@ -252,7 +252,7 @@ struct C2 : B2 { C2(); }; C2::C2() : A(3), B2() {} // CHECK-LABEL: define void @_ZN7PR111242C2C1Ev - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* {{.*}}, i64 16, i32 8, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %{{.*}}, i8* align 8 {{.*}}, i64 16, i1 false) // CHECK-NEXT: call void @_ZN7PR111242B2C2Ev } Index: test/CodeGenCXX/varargs.cpp =================================================================== --- test/CodeGenCXX/varargs.cpp +++ test/CodeGenCXX/varargs.cpp @@ -35,7 +35,7 @@ // CHECK-NEXT: [[TMP:%.*]] = alloca [[A]], align 4 // CHECK-NEXT: [[T0:%.*]] = bitcast [[A]]* [[TMP]] to i8* // CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[X]] to i8* - // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 8, i32 4, i1 false) + // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T0]], i8* align 4 [[T1]], i64 8, i1 false) // CHECK-NEXT: [[T0:%.*]] = bitcast [[A]]* [[TMP]] to i64* // CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[T0]], align 4 // CHECK-NEXT: call void (...) @_ZN5test13fooEz(i64 [[T1]]) Index: test/CodeGenObjC/arc-foreach.m =================================================================== --- test/CodeGenObjC/arc-foreach.m +++ test/CodeGenObjC/arc-foreach.m @@ -42,7 +42,7 @@ // Initialize the fast enumaration state. // CHECK-LP64-NEXT: [[T0:%.*]] = bitcast [[STATE_T]]* [[STATE]] to i8* -// CHECK-LP64-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 64, i32 8, i1 false) +// CHECK-LP64-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[T0]], i8 0, i64 64, i1 false) // Evaluate the collection expression and retain. // CHECK-LP64-NEXT: [[T0:%.*]] = load [[ARRAY_T]]*, [[ARRAY_T]]** [[ARRAY]], align 8 Index: test/CodeGenObjC/arc.m =================================================================== --- test/CodeGenObjC/arc.m +++ test/CodeGenObjC/arc.m @@ -507,7 +507,7 @@ // CHECK: [[X:%.*]] = alloca [5 x i8*], align 16 // CHECK: call void @llvm.lifetime.start // CHECK-NEXT: [[T0:%.*]] = bitcast [5 x i8*]* [[X]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 40, i32 16, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[T0]], i8 0, i64 40, i1 false) id x[5]; extern id test19_helper(void); @@ -556,7 +556,7 @@ // Zero-initialize. // CHECK-NEXT: [[T0:%.*]] = bitcast i8** [[VLA]] to i8* // CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[DIM]], 8 - // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 [[T1]], i32 16, i1 false) + // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 [[T0]], i8 0, i64 [[T1]], i1 false) // Destroy. // CHECK-NEXT: [[END:%.*]] = getelementptr inbounds i8*, i8** [[VLA]], i64 [[DIM]] @@ -599,7 +599,7 @@ // CHECK-NEXT: [[T0:%.*]] = bitcast [3 x i8*]* [[VLA]] to i8* // CHECK-NEXT: [[T1:%.*]] = mul nuw i64 2, [[DIM]] // CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[T1]], 24 - // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 [[T2]], i32 16, i1 false) + // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 [[T0]], i8 0, i64 [[T2]], i1 false) // Destroy. // CHECK-NEXT: [[T0:%.*]] = mul nuw i64 2, [[DIM]] Index: test/CodeGenObjC/builtin-memfns.m =================================================================== --- test/CodeGenObjC/builtin-memfns.m +++ test/CodeGenObjC/builtin-memfns.m @@ -5,6 +5,6 @@ // PR13697 void test1(int *a, id b) { // CHECK: @test1 - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i32 1, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false) memcpy(a, b, 8); } Index: test/CodeGenObjC/messages-2.m =================================================================== --- test/CodeGenObjC/messages-2.m +++ test/CodeGenObjC/messages-2.m @@ -157,7 +157,7 @@ // CHECK: call {{.*}} @objc_msgSend_stret to // CHECK-NEXT: br label // CHECK: [[T0:%.*]] = bitcast [[POINT_T]]* [[POINT]] to i8* - // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 48, i32 4, i1 false) + // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[T0]], i8 0, i64 48, i1 false) // CHECK-NEXT: br label // CHECK-NF: [[X:%.*]] = alloca [[A]]* @@ -169,7 +169,7 @@ // CHECK-NF: call {{.*}} @objc_msgSend_stret to // CHECK-NF-NEXT: br label // CHECK-NF: [[T0:%.*]] = bitcast [[POINT_T]]* [[POINT]] to i8* - // CHECK-NF-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 48, i32 4, i1 false) + // CHECK-NF-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[T0]], i8 0, i64 48, i1 false) // CHECK-NF-NEXT: br label MyPoint point = [x returnAPoint]; } Index: test/CodeGenObjC/stret-1.m =================================================================== --- test/CodeGenObjC/stret-1.m +++ test/CodeGenObjC/stret-1.m @@ -16,7 +16,7 @@ s = [(id)(argc&~255) method]; // CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (%struct.stret*, i8*, i8*)*)(%struct.stret* sret [[T0:%[^,]+]] // CHECK: [[T0P:%.*]] = bitcast %struct.stret* [[T0]] to i8* - // CHECK: call void @llvm.memset.p0i8.i64(i8* [[T0P]], i8 0, i64 400, i32 4, i1 false) + // CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 [[T0P]], i8 0, i64 400, i1 false) s = [Test method]; // CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (%struct.stret*, i8*, i8*)*)(%struct.stret* sret [[T1:%[^,]+]] Index: test/CodeGenObjCXX/arc-exceptions.mm =================================================================== --- test/CodeGenObjCXX/arc-exceptions.mm +++ test/CodeGenObjCXX/arc-exceptions.mm @@ -101,7 +101,7 @@ // Construct array. // CHECK-NEXT: [[ARRAY:%.*]] = getelementptr inbounds [[A]], [[A]]* [[THIS]], i32 0, i32 1 // CHECK-NEXT: [[T0:%.*]] = bitcast [2 x [3 x i8*]]* [[ARRAY]] to i8* - // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 48, i32 8, i1 false) + // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[T0]], i8 0, i64 48, i1 false) // throw 0; // CHECK: invoke void @__cxa_throw( // Landing pad from throw site: Index: test/CodeGenOpenCL/amdgcn-automatic-variable.cl =================================================================== --- test/CodeGenOpenCL/amdgcn-automatic-variable.cl +++ test/CodeGenOpenCL/amdgcn-automatic-variable.cl @@ -62,7 +62,7 @@ // CHECK-LABEL: define void @func3() // CHECK: %a = alloca [16 x [1 x float]], align 4, addrspace(5) // CHECK: %[[CAST:.+]] = bitcast [16 x [1 x float]] addrspace(5)* %a to i8 addrspace(5)* -// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* %[[CAST]], i8 0, i64 64, i32 4, i1 false) +// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 4 %[[CAST]], i8 0, i64 64, i1 false) void func3(void) { float a[16][1] = {{0.}}; } Index: test/CodeGenOpenCL/amdgpu-nullptr.cl =================================================================== --- test/CodeGenOpenCL/amdgpu-nullptr.cl +++ test/CodeGenOpenCL/amdgpu-nullptr.cl @@ -143,9 +143,9 @@ // NOOPT: store i8* null, i8** %sp3, align 4 // NOOPT: store i8* null, i8** %sp4, align 4 // NOOPT: %[[SS1:.*]] = bitcast %struct.StructTy1* %SS1 to i8* -// NOOPT: call void @llvm.memcpy.p0i8.p2i8.i64(i8* %[[SS1]], i8 addrspace(2)* bitcast (%struct.StructTy1 addrspace(2)* @test_func_scope_var_private.SS1 to i8 addrspace(2)*), i64 32, i32 8, i1 false) +// NOOPT: call void @llvm.memcpy.p0i8.p2i8.i64(i8* align 8 %[[SS1]], i8 addrspace(2)* align 8 bitcast (%struct.StructTy1 addrspace(2)* @test_func_scope_var_private.SS1 to i8 addrspace(2)*), i64 32, i1 false) // NOOPT: %[[SS2:.*]] = bitcast %struct.StructTy2* %SS2 to i8* -// NOOPT: call void @llvm.memset.p0i8.i64(i8* %[[SS2]], i8 0, i64 24, i32 8, i1 false) +// NOOPT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[SS2]], i8 0, i64 24, i1 false) void test_func_scope_var_private(void) { private char *sp1 = 0; private char *sp2 = NULL; @@ -163,9 +163,9 @@ // NOOPT: store i8 addrspace(3)* null, i8 addrspace(3)** %sp3, align 4 // NOOPT: store i8 addrspace(3)* null, i8 addrspace(3)** %sp4, align 4 // NOOPT: %[[SS1:.*]] = bitcast %struct.StructTy1* %SS1 to i8* -// NOOPT: call void @llvm.memcpy.p0i8.p2i8.i64(i8* %[[SS1]], i8 addrspace(2)* bitcast (%struct.StructTy1 addrspace(2)* @test_func_scope_var_local.SS1 to i8 addrspace(2)*), i64 32, i32 8, i1 false) +// NOOPT: call void @llvm.memcpy.p0i8.p2i8.i64(i8* align 8 %[[SS1]], i8 addrspace(2)* align 8 bitcast (%struct.StructTy1 addrspace(2)* @test_func_scope_var_local.SS1 to i8 addrspace(2)*), i64 32, i1 false) // NOOPT: %[[SS2:.*]] = bitcast %struct.StructTy2* %SS2 to i8* -// NOOPT: call void @llvm.memset.p0i8.i64(i8* %[[SS2]], i8 0, i64 24, i32 8, i1 false) +// NOOPT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[SS2]], i8 0, i64 24, i1 false) void test_func_scope_var_local(void) { local char *sp1 = 0; local char *sp2 = NULL; @@ -510,7 +510,7 @@ } StructTy3; // CHECK-LABEL: test_memset_private -// CHECK: call void @llvm.memset.p0i8.i64(i8* nonnull {{.*}}, i8 0, i64 40, i32 8, i1 false) +// CHECK: call void @llvm.memset.p0i8.i64(i8* nonnull align 8 {{.*}}, i8 0, i64 40, i1 false) void test_memset_private(private StructTy3 *ptr) { StructTy3 S3 = {0, 0, 0, 0, 0}; *ptr = S3; Index: test/CodeGenOpenCL/partial_initializer.cl =================================================================== --- test/CodeGenOpenCL/partial_initializer.cl +++ test/CodeGenOpenCL/partial_initializer.cl @@ -36,7 +36,7 @@ // CHECK: %[[V2:.*]] = alloca <4 x i32>, align 16 // CHECK: %[[v0:.*]] = bitcast [6 x [6 x float]]* %A to i8* - // CHECK: call void @llvm.memset.p0i8.i32(i8* %[[v0]], i8 0, i32 144, i32 4, i1 false) + // CHECK: call void @llvm.memset.p0i8.i32(i8* align 4 %[[v0]], i8 0, i32 144, i1 false) // CHECK: %[[v1:.*]] = bitcast i8* %[[v0]] to [6 x [6 x float]]* // CHECK: %[[v2:.*]] = getelementptr [6 x [6 x float]], [6 x [6 x float]]* %[[v1]], i32 0, i32 0 // CHECK: %[[v3:.*]] = getelementptr [6 x float], [6 x float]* %[[v2]], i32 0, i32 0 @@ -46,7 +46,7 @@ float A[6][6] = {1.0f, 2.0f}; // CHECK: %[[v5:.*]] = bitcast %struct.StrucTy* %S to i8* - // CHECK: call void @llvm.memcpy.p0i8.p2i8.i32(i8* %[[v5]], i8 addrspace(2)* bitcast (%struct.StrucTy addrspace(2)* @f.S to i8 addrspace(2)*), i32 12, i32 4, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %[[v5]], i8 addrspace(2)* align 4 bitcast (%struct.StrucTy addrspace(2)* @f.S to i8 addrspace(2)*), i32 12, i1 false) StrucTy S = {1, 2}; // CHECK: store <2 x i32> , <2 x i32>* %[[compoundliteral1]], align 8 Index: test/CodeGenOpenCL/private-array-initialization.cl =================================================================== --- test/CodeGenOpenCL/private-array-initialization.cl +++ test/CodeGenOpenCL/private-array-initialization.cl @@ -5,5 +5,5 @@ void test() { __private int arr[] = {1, 2, 3}; // CHECK: %[[arr_i8_ptr:[0-9]+]] = bitcast [3 x i32]* %arr to i8* -// CHECK: call void @llvm.memcpy.p0i8.p2i8.i32(i8* %[[arr_i8_ptr]], i8 addrspace(2)* bitcast ([3 x i32] addrspace(2)* @test.arr to i8 addrspace(2)*), i32 12, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %[[arr_i8_ptr]], i8 addrspace(2)* align 4 bitcast ([3 x i32] addrspace(2)* @test.arr to i8 addrspace(2)*), i32 12, i1 false) } Index: test/Modules/templates.mm =================================================================== --- test/Modules/templates.mm +++ test/Modules/templates.mm @@ -77,10 +77,10 @@ // CHECK: %[[l:.*]] = alloca %[[ListInt:[^ ]*]], align 8 // CHECK: %[[r:.*]] = alloca %[[ListInt]], align 8 - // CHECK: call {{.*}}memcpy{{.*}}(i8* %{{.*}}, i8* bitcast ({{.*}}* @_ZZ15testMixedStructvE1l to i8*), i64 16, + // CHECK: call {{.*}}memcpy{{.*}}(i8* align {{[0-9]+}} %{{.*}}, i8* align {{[0-9]+}} bitcast ({{.*}}* @_ZZ15testMixedStructvE1l to i8*), i64 16, ListInt_left l{0, 1}; - // CHECK: call {{.*}}memcpy{{.*}}(i8* %{{.*}}, i8* bitcast ({{.*}}* @_ZZ15testMixedStructvE1r to i8*), i64 16, + // CHECK: call {{.*}}memcpy{{.*}}(i8* align {{[0-9]+}} %{{.*}}, i8* align {{[0-9]+}} bitcast ({{.*}}* @_ZZ15testMixedStructvE1r to i8*), i64 16, ListInt_right r{0, 2}; // CHECK: call void @_Z10useListIntR4ListIiE(%[[ListInt]]* dereferenceable({{[0-9]+}}) %[[l]]) Index: test/OpenMP/atomic_write_codegen.c =================================================================== --- test/OpenMP/atomic_write_codegen.c +++ test/OpenMP/atomic_write_codegen.c @@ -142,7 +142,7 @@ dx = dv; // CHECK: [[LD:%.+]] = load x86_fp80, x86_fp80* // CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[LDTEMP:%.*]] to i8* -// CHECK: call void @llvm.memset.p0i8.i64(i8* [[BITCAST]], i8 0, i64 16, i32 16, i1 false) +// CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[BITCAST]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 [[LD]], x86_fp80* [[LDTEMP]] // CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[LDTEMP:%.*]] to i128* // CHECK: [[LD:%.+]] = load i128, i128* [[BITCAST]] @@ -231,7 +231,7 @@ // CHECK: load i64, i64* // CHECK: [[VAL:%.+]] = uitofp i64 %{{.+}} to x86_fp80 // CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[TEMP:%.+]] to i8* -// CHECK: call void @llvm.memset.p0i8.i64(i8* [[BITCAST]], i8 0, i64 16, i32 16, i1 false) +// CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 [[BITCAST]], i8 0, i64 16, i1 false) // CHECK: store x86_fp80 [[VAL]], x86_fp80* [[TEMP]] // CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[TEMP]] to i128* // CHECK: [[VAL:%.+]] = load i128, i128* [[BITCAST]] Index: test/OpenMP/distribute_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/distribute_firstprivate_codegen.cpp +++ test/OpenMP/distribute_firstprivate_codegen.cpp @@ -255,7 +255,7 @@ // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], // CHECK-DAG: [[VEC_ADDR_VAL_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i{{[0-9]+}}* // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i{{[0-9]+}}* -// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* [[VEC_PRIV_BCAST]], i{{[0-9]+}}* [[VEC_ADDR_VAL_BCAST]],{{.+}}) +// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], i{{[0-9]+}}* align {{[0-9]+}} [[VEC_ADDR_VAL_BCAST]],{{.+}}) // init s_arr // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], @@ -270,7 +270,7 @@ // CHECK-DAG: [[S_ARR_DST_PAST:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]],{{.+}} ], [ [[S_ARR_DST:%.+]],{{.+}} ] // CHECK-DAG: [[S_ARR_SRC_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_SRC_PAST]] to{{.+}} // CHECK-DAG: [[S_ARR_DST_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_DST_PAST]] to{{.+}} -// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[S_ARR_DST_BCAST]], {{.+}}* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[S_ARR_DST_BCAST]], {{.+}}* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK-DAG: [[S_ARR_SRC]] = getelementptr{{.+}} // CHECK-DAG: [[S_ARR_DST]] = getelementptr{{.+}} // CHECK-DAG: [[S_ARR_CPY_FIN:%.+]] = icmp{{.+}} [[S_ARR_DST]], [[S_ARR_PRIV_NEXT]] @@ -283,7 +283,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]], // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_PRIV]] to{{.+}} // CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to{{.+}} -// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_BCAST]],{{.+}}) +// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_BCAST]],{{.+}}) // CHECK-DAG: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]], // init svar @@ -350,7 +350,7 @@ // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], // CHECK-DAG: [[VEC_ADDR_VAL_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i{{[0-9]+}}* // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i{{[0-9]+}}* -// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* [[VEC_PRIV_BCAST]], i{{[0-9]+}}* [[VEC_ADDR_VAL_BCAST]],{{.+}}) +// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], i{{[0-9]+}}* align {{[0-9]+}} [[VEC_ADDR_VAL_BCAST]],{{.+}}) // init s_arr // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], @@ -365,7 +365,7 @@ // CHECK-DAG: [[S_ARR_DST_PAST:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]],{{.+}} ], [ [[S_ARR_DST:%.+]],{{.+}} ] // CHECK-DAG: [[S_ARR_SRC_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_SRC_PAST]] to{{.+}} // CHECK-DAG: [[S_ARR_DST_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_DST_PAST]] to{{.+}} -// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[S_ARR_DST_BCAST]], {{.+}}* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[S_ARR_DST_BCAST]], {{.+}}* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK-DAG: [[S_ARR_SRC]] = getelementptr{{.+}} // CHECK-DAG: [[S_ARR_DST]] = getelementptr{{.+}} // CHECK-DAG: [[S_ARR_CPY_FIN:%.+]] = icmp{{.+}} [[S_ARR_DST]], [[S_ARR_PRIV_NEXT]] @@ -378,7 +378,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]], // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_PRIV]] to{{.+}} // CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to{{.+}} -// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_BCAST]],{{.+}}) +// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_BCAST]],{{.+}}) // CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP_PRIV]], // CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* %.omp{{.+}}, Index: test/OpenMP/distribute_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/distribute_lastprivate_codegen.cpp +++ test/OpenMP/distribute_lastprivate_codegen.cpp @@ -256,7 +256,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]], // CHECK: call void @__kmpc_for_static_fini( // lastprivates @@ -269,7 +269,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -280,7 +280,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -289,7 +289,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], // CHECK: ret void @@ -358,7 +358,7 @@ // CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]], // CHECK: call void @__kmpc_for_static_fini( // lastprivates @@ -371,7 +371,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -382,7 +382,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -391,6 +391,6 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: ret void #endif Index: test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp +++ test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp @@ -328,7 +328,7 @@ // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to -// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], +// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]], // s_arr // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], @@ -345,7 +345,7 @@ // CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]], // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to // CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to -// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_REF_BCAST]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_REF_BCAST]], // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], // svar @@ -421,7 +421,7 @@ // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to -// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], +// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]], // s_arr // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], @@ -438,7 +438,7 @@ // CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]], // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to // CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to -// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[VAR_ADDR_BCAST]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VAR_ADDR_BCAST]], // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], // CHECK: call void @__kmpc_for_static_init_4( @@ -510,7 +510,7 @@ // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to -// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], +// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]], // s_arr // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], @@ -527,7 +527,7 @@ // CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]], // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to // CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to -// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_REF_BCAST]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_REF_BCAST]], // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], // CHECK: call void @__kmpc_for_static_init_4( @@ -591,7 +591,7 @@ // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to -// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], +// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]], // s_arr // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], @@ -608,7 +608,7 @@ // CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]], // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to // CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to -// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[VAR_ADDR_BCAST]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VAR_ADDR_BCAST]], // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], // CHECK: call void @__kmpc_for_static_init_4( Index: test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp +++ test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp @@ -349,7 +349,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -360,7 +360,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -369,7 +369,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], // CHECK: ret void @@ -441,7 +441,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]], // CHECK: call void @__kmpc_for_static_fini( @@ -455,7 +455,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -466,7 +466,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -475,7 +475,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], // CHECK: ret void @@ -549,7 +549,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]] to [[S_INT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -560,7 +560,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -569,7 +569,7 @@ // CHECK: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8* // CHECK: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],{{.+}}) // CHECK: ret void // outlined function for 'parallel for' @@ -635,7 +635,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]], // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8* -// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]], // CHECK: call void @__kmpc_for_static_fini( @@ -649,7 +649,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]] to [[S_INT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -660,7 +660,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -669,7 +669,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: ret void #endif Index: test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp +++ test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -326,7 +326,7 @@ // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to -// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], +// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]], // s_arr // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], @@ -343,7 +343,7 @@ // CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]], // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to // CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to -// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_REF_BCAST]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_REF_BCAST]], // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], // svar @@ -419,7 +419,7 @@ // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to -// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], +// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]], // s_arr // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], @@ -436,7 +436,7 @@ // CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]], // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to // CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to -// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[VAR_ADDR_BCAST]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VAR_ADDR_BCAST]], // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], // CHECK: call void @__kmpc_for_static_init_4( @@ -508,7 +508,7 @@ // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to -// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], +// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]], // s_arr // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], @@ -525,7 +525,7 @@ // CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]], // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to // CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to -// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_REF_BCAST]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_REF_BCAST]], // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], // CHECK: call void @__kmpc_for_static_init_4( @@ -589,7 +589,7 @@ // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to -// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], +// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VEC_ADDR_BCAST]], // s_arr // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], @@ -606,7 +606,7 @@ // CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]], // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to // CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to -// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[VAR_ADDR_BCAST]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VAR_ADDR_BCAST]], // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], // CHECK: call void @__kmpc_for_static_init_4( Index: test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp +++ test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -359,7 +359,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -370,7 +370,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -379,7 +379,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], // CHECK: ret void @@ -451,7 +451,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]], // CHECK: call void @__kmpc_for_static_fini( @@ -465,7 +465,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -476,7 +476,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -485,7 +485,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], // CHECK: ret void @@ -559,7 +559,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]] to [[S_INT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -570,7 +570,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -579,7 +579,7 @@ // CHECK: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8* // CHECK: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],{{.+}}) // CHECK: ret void // outlined function for 'parallel for' @@ -645,7 +645,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]], // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8* -// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]], // CHECK: call void @__kmpc_for_static_fini( @@ -659,7 +659,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]] to [[S_INT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -670,7 +670,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -679,7 +679,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: ret void // CHECK: !{!"llvm.loop.vectorize.enable", i1 true} Index: test/OpenMP/distribute_simd_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/distribute_simd_firstprivate_codegen.cpp +++ test/OpenMP/distribute_simd_firstprivate_codegen.cpp @@ -257,7 +257,7 @@ // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], // CHECK-DAG: [[VEC_ADDR_VAL_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i{{[0-9]+}}* // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i{{[0-9]+}}* -// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* [[VEC_PRIV_BCAST]], i{{[0-9]+}}* [[VEC_ADDR_VAL_BCAST]],{{.+}}) +// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], i{{[0-9]+}}* align {{[0-9]+}} [[VEC_ADDR_VAL_BCAST]],{{.+}}) // init s_arr // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], @@ -272,7 +272,7 @@ // CHECK-DAG: [[S_ARR_DST_PAST:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]],{{.+}} ], [ [[S_ARR_DST:%.+]],{{.+}} ] // CHECK-DAG: [[S_ARR_SRC_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_SRC_PAST]] to{{.+}} // CHECK-DAG: [[S_ARR_DST_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_DST_PAST]] to{{.+}} -// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[S_ARR_DST_BCAST]], {{.+}}* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[S_ARR_DST_BCAST]], {{.+}}* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK-DAG: [[S_ARR_SRC]] = getelementptr{{.+}} // CHECK-DAG: [[S_ARR_DST]] = getelementptr{{.+}} // CHECK-DAG: [[S_ARR_CPY_FIN:%.+]] = icmp{{.+}} [[S_ARR_DST]], [[S_ARR_PRIV_NEXT]] @@ -285,7 +285,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]], // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_PRIV]] to{{.+}} // CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to{{.+}} -// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_BCAST]],{{.+}}) +// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_BCAST]],{{.+}}) // CHECK-DAG: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]], // init svar @@ -352,7 +352,7 @@ // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], // CHECK-DAG: [[VEC_ADDR_VAL_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i{{[0-9]+}}* // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i{{[0-9]+}}* -// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* [[VEC_PRIV_BCAST]], i{{[0-9]+}}* [[VEC_ADDR_VAL_BCAST]],{{.+}}) +// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* align {{[0-9]+}} [[VEC_PRIV_BCAST]], i{{[0-9]+}}* align {{[0-9]+}} [[VEC_ADDR_VAL_BCAST]],{{.+}}) // init s_arr // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], @@ -367,7 +367,7 @@ // CHECK-DAG: [[S_ARR_DST_PAST:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]],{{.+}} ], [ [[S_ARR_DST:%.+]],{{.+}} ] // CHECK-DAG: [[S_ARR_SRC_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_SRC_PAST]] to{{.+}} // CHECK-DAG: [[S_ARR_DST_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_DST_PAST]] to{{.+}} -// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[S_ARR_DST_BCAST]], {{.+}}* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[S_ARR_DST_BCAST]], {{.+}}* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK-DAG: [[S_ARR_SRC]] = getelementptr{{.+}} // CHECK-DAG: [[S_ARR_DST]] = getelementptr{{.+}} // CHECK-DAG: [[S_ARR_CPY_FIN:%.+]] = icmp{{.+}} [[S_ARR_DST]], [[S_ARR_PRIV_NEXT]] @@ -380,7 +380,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]], // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_PRIV]] to{{.+}} // CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to{{.+}} -// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_BCAST]],{{.+}}) +// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_BCAST]],{{.+}}) // CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP_PRIV]], // CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* %.omp{{.+}}, Index: test/OpenMP/distribute_simd_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/distribute_simd_lastprivate_codegen.cpp +++ test/OpenMP/distribute_simd_lastprivate_codegen.cpp @@ -267,7 +267,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]], // CHECK: call void @__kmpc_for_static_fini( // lastprivates @@ -280,7 +280,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -291,7 +291,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -300,7 +300,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], // CHECK: ret void @@ -369,7 +369,7 @@ // CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]], // CHECK: call void @__kmpc_for_static_fini( // lastprivates @@ -382,7 +382,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -393,7 +393,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -402,7 +402,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: ret void // CHECK: !{!"llvm.loop.vectorize.enable", i1 true} Index: test/OpenMP/for_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/for_firstprivate_codegen.cpp +++ test/OpenMP/for_firstprivate_codegen.cpp @@ -241,7 +241,7 @@ // firstprivate vec(vec) // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), // firstprivate s_arr(s_arr) // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 @@ -309,7 +309,7 @@ // firstprivate vec(vec) // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]], // firstprivate s_arr(s_arr) // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 Index: test/OpenMP/for_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/for_lastprivate_codegen.cpp +++ test/OpenMP/for_lastprivate_codegen.cpp @@ -523,7 +523,7 @@ // original vec[]=private_vec[]; // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]], // original s_arr[]=private_s_arr[]; // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 @@ -752,7 +752,7 @@ // original vec[]=private_vec[]; // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]], // original s_arr[]=private_s_arr[]; // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 Index: test/OpenMP/for_reduction_codegen.cpp =================================================================== --- test/OpenMP/for_reduction_codegen.cpp +++ test/OpenMP/for_reduction_codegen.cpp @@ -352,7 +352,7 @@ // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_REF]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_PRIV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // var1 = var1.operator &&(var1_reduction); // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_REF]]) @@ -368,7 +368,7 @@ // CHECK: call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // t_var1 = min(t_var1, t_var1_reduction); // CHECK: [[T_VAR1_VAL:%.+]] = load float, float* [[T_VAR1_REF]], @@ -406,7 +406,7 @@ // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_REF]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_PRIV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // CHECK: call void @__kmpc_end_critical( // var1 = var1.operator &&(var1_reduction); @@ -424,7 +424,7 @@ // CHECK: call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // CHECK: call void @__kmpc_end_critical( // t_var1 = min(t_var1, t_var1_reduction); @@ -510,7 +510,7 @@ // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_LHS]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_RHS]]) // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_LHS]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // var1_lhs = var1_lhs.operator &&(var1_rhs); // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_LHS]]) @@ -526,7 +526,7 @@ // CHECK: call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_LHS]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // t_var1_lhs = min(t_var1_lhs, t_var1_rhs); // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load float, float* [[T_VAR1_LHS]], @@ -629,7 +629,7 @@ // CHECK: phi [[S_FLOAT_TY]]* // CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}) // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.+}}, i8* align 4 [[BITCAST]], i64 4, i1 false) // CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]] // CHECK: br i1 [[DONE]], @@ -659,7 +659,7 @@ // CHECK: call void @__kmpc_critical( // CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}) // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.+}}, i8* align 4 [[BITCAST]], i64 4, i1 false) // CHECK: call void @__kmpc_end_critical( // CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]] // CHECK: br i1 [[DONE]], @@ -732,7 +732,7 @@ // CHECK: phi [[S_FLOAT_TY]]* // CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}) // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.+}}, i8* align 4 [[BITCAST]], i64 4, i1 false) // CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]] // CHECK: br i1 [[DONE]], @@ -823,7 +823,7 @@ // CHECK: phi [[S_FLOAT_TY]]* // CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}) // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.+}}, i8* align 4 [[BITCAST]], i64 4, i1 false) // CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]] // CHECK: br i1 [[DONE]], @@ -853,7 +853,7 @@ // CHECK: call void @__kmpc_critical( // CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}) // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.+}}, i8* align 4 [[BITCAST]], i64 4, i1 false) // CHECK: call void @__kmpc_end_critical( // CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]] // CHECK: br i1 [[DONE]], @@ -922,7 +922,7 @@ // CHECK: phi [[S_FLOAT_TY]]* // CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}) // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.+}}, i8* align 4 [[BITCAST]], i64 4, i1 false) // CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]] // CHECK: br i1 [[DONE]], @@ -1292,7 +1292,7 @@ // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // var1 = var1.operator &&(var1_reduction); // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_REF]]) @@ -1308,7 +1308,7 @@ // CHECK: call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // t_var1 = min(t_var1, t_var1_reduction); // CHECK: [[T_VAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_REF]], @@ -1334,7 +1334,7 @@ // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // CHECK: call void @__kmpc_end_critical( // var1 = var1.operator &&(var1_reduction); @@ -1352,7 +1352,7 @@ // CHECK: call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // CHECK: call void @__kmpc_end_critical( // t_var1 = min(t_var1, t_var1_reduction); @@ -1419,7 +1419,7 @@ // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_LHS]], [[S_INT_TY]]* dereferenceable(4) [[VAR_RHS]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_LHS]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // var1_lhs = var1_lhs.operator &&(var1_rhs); // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_LHS]]) @@ -1435,7 +1435,7 @@ // CHECK: call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_LHS]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // t_var1_lhs = min(t_var1_lhs, t_var1_rhs); // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_LHS]], Index: test/OpenMP/nvptx_target_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/nvptx_target_firstprivate_codegen.cpp +++ test/OpenMP/nvptx_target_firstprivate_codegen.cpp @@ -72,17 +72,17 @@ // firstprivate(b): memcpy(b_priv,b_in) // TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x float]* [[B_PRIV]] to i8* // TCHECK: [[B_ADDR_REF_BCAST:%.+]] = bitcast [10 x float]* [[B_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_ADDR_REF_BCAST]], {{.+}}) + // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_ADDR_REF_BCAST]], {{.+}}) // firstprivate(c) // TCHECK: [[C_PRIV_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_PRIV]] to i8* // TCHECK: [[C_IN_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[C_PRIV_BCAST]], i8* [[C_IN_BCAST]],{{.+}}) + // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[C_PRIV_BCAST]], i8* align {{[0-9]+}} [[C_IN_BCAST]],{{.+}}) // firstprivate(d) // TCHECK: [[D_PRIV_BCAST:%.+]] = bitcast [[TT]]* [[D_PRIV]] to i8* // TCHECK: [[D_IN_BCAST:%.+]] = bitcast [[TT]]* [[D_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[D_PRIV_BCAST]], i8* [[D_IN_BCAST]],{{.+}}) + // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[D_PRIV_BCAST]], i8* align {{[0-9]+}} [[D_IN_BCAST]],{{.+}}) // TCHECK: load i16, i16* [[A2_ADDR]], @@ -151,7 +151,7 @@ // firstprivate(b) // TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8* // TCHECK: [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8* -// TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_IN_BCAST]],{{.+}}) +// TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}}) // TCHECK: ret void @@ -214,7 +214,7 @@ // firstprivate(b) // TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8* // TCHECK: [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8* -// TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_IN_BCAST]],{{.+}}) +// TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}}) // TCHECK: ret void Index: test/OpenMP/ordered_doacross_codegen.cpp =================================================================== --- test/OpenMP/ordered_doacross_codegen.cpp +++ test/OpenMP/ordered_doacross_codegen.cpp @@ -24,7 +24,7 @@ // CHECK: icmp // CHECK-NEXT: br i1 % // CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIMS]] to i8* -// CHECK: call void @llvm.memset.p0i8.i64(i8* [[CAST]], i8 0, i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 [[CAST]], i8 0, i64 24, i1 false) // CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 1 // CHECK: store i64 %{{.+}}, i64* % // CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 2 @@ -81,7 +81,7 @@ // CHECK: icmp // CHECK-NEXT: br i1 % // CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIMS]] to i8* -// CHECK: call void @llvm.memset.p0i8.i64(i8* [[CAST]], i8 0, i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 [[CAST]], i8 0, i64 24, i1 false) // CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 1 // CHECK: store i64 %{{.+}}, i64* % // CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 2 Index: test/OpenMP/parallel_codegen.cpp =================================================================== --- test/OpenMP/parallel_codegen.cpp +++ test/OpenMP/parallel_codegen.cpp @@ -47,7 +47,7 @@ // CHECK-DEBUG: [[LOC_2_ADDR:%.+]] = alloca %ident_t // CHECK-DEBUG: [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[LOC_2_ADDR]] to i8* // CHECK-DEBUG-NEXT: [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[DEF_LOC_2]] to i8* -// CHECK-DEBUG-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[KMPC_LOC_VOIDPTR]], i8* [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i32 8, i1 false) +// CHECK-DEBUG-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[KMPC_LOC_VOIDPTR]], i8* align 8 [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i1 false) // CHECK-DEBUG: store i32 %argc, i32* [[ARGC_ADDR:%.+]], // CHECK-DEBUG: [[VLA:%.+]] = alloca i32, i64 [[VLA_SIZE:%[^,]+]], // CHECK-DEBUG: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4 @@ -97,7 +97,7 @@ // CHECK-DEBUG-DAG: [[LOC_2_ADDR:%.+]] = alloca %ident_t // CHECK-DEBUG: [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[LOC_2_ADDR]] to i8* // CHECK-DEBUG-NEXT: [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[DEF_LOC_2]] to i8* -// CHECK-DEBUG-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[KMPC_LOC_VOIDPTR]], i8* [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i32 8, i1 false) +// CHECK-DEBUG-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[KMPC_LOC_VOIDPTR]], i8* align 8 [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i1 false) // CHECK-DEBUG-NEXT: store i8** %argc, i8*** [[ARGC_ADDR:%.+]], // CHECK-DEBUG: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC2]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]] Index: test/OpenMP/parallel_copyin_codegen.cpp =================================================================== --- test/OpenMP/parallel_copyin_codegen.cpp +++ test/OpenMP/parallel_copyin_codegen.cpp @@ -294,10 +294,10 @@ // threadprivate_vec = vec; // CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VEC]] -// CHECK: call void @llvm.memcpy{{.*}}(i8* %{{.+}}, i8* bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), +// CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x i32]* [[MASTER_REF2]] to i8* -// TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), i8* [[MASTER_CAST]] +// TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), i8* align {{[0-9]+}} [[MASTER_CAST]] // threadprivate_s_arr = s_arr; // CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[S_ARR]] @@ -415,10 +415,10 @@ // threadprivate_vec = vec; // CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VEC]] -// CHECK: call {{.*}}void @llvm.memcpy{{.*}}(i8* %{{.+}}, i8* bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*), +// CHECK: call {{.*}}void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*), // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x i32]* [[MASTER_REF1]] to i8* -// TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*), i8* [[MASTER_CAST]] +// TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*), i8* align {{[0-9]+}} [[MASTER_CAST]] // threadprivate_s_arr = s_arr; // CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_S_ARR]] @@ -508,13 +508,13 @@ static St s[2]; // ARRAY: @__kmpc_fork_call( // ARRAY: call i8* @__kmpc_threadprivate_cached( -// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* bitcast ([2 x i32]* @{{.+}} to i8*), i64 8, i32 4, i1 false) +// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.+}}, i8* align 4 bitcast ([2 x i32]* @{{.+}} to i8*), i64 8, i1 false) // ARRAY: call dereferenceable(8) %struct.St* @{{.+}}(%struct.St* %{{.+}}, %struct.St* dereferenceable(8) %{{.+}}) // TLS-ARRAY: @__kmpc_fork_call( // TLS-ARRAY: [[REFT:%.+]] = load [2 x i32]*, [2 x i32]** [[ADDR:%.+]], // TLS-ARRAY: [[REF:%.+]] = bitcast [2 x i32]* [[REFT]] to i8* -// TLS-ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast ([2 x i32]* @{{.+}} to i8*), i8* [[REF]], i64 8, i32 4, i1 false) +// TLS-ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast ([2 x i32]* @{{.+}} to i8*), i8* align 4 [[REF]], i64 8, i1 false) // TLS-ARRAY: call dereferenceable(8) %struct.St* @{{.+}}(%struct.St* %{{.+}}, %struct.St* dereferenceable(8) %{{.+}}) #pragma omp threadprivate(a, s) Index: test/OpenMP/parallel_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/parallel_firstprivate_codegen.cpp +++ test/OpenMP/parallel_firstprivate_codegen.cpp @@ -366,7 +366,7 @@ // CHECK-64: [[SIVAR7_CONV:%.+]] = bitcast i64* [[SIVAR7_PRIV]] to i32* // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]], // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 @@ -459,7 +459,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]], align 128 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]], i{{[0-9]+}} {{[0-9]+}}, i{{[0-9]+}} 128, +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align 128 [[VEC_DEST]], i8* align 128 [[VEC_SRC]], i{{[0-9]+}} {{[0-9]+}}, i1 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_REF]] to [[S_INT_TY]]* // CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 @@ -507,7 +507,7 @@ // ARRAY-DAG: store double* %{{.+}}, double** [[PRIV_VLA2]], // ARRAY: call i8* @llvm.stacksave() // ARRAY: [[SIZE:%.+]] = mul nuw i64 %{{.+}}, 8 -// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* %{{.+}}, i64 [[SIZE]], i32 128, i1 false) +// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 %{{.+}}, i8* align 128 %{{.+}}, i64 [[SIZE]], i1 false) #pragma omp parallel firstprivate(a, s, vla1, vla2) s[0].St_func(s, n, vla1); ; @@ -523,7 +523,7 @@ // ARRAY-DAG: store double* %{{.+}}, double** [[PRIV_VLA2]], // ARRAY: call i8* @llvm.stacksave() // ARRAY: [[SIZE:%.+]] = mul nuw i64 %{{.+}}, 8 -// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* %{{.+}}, i64 [[SIZE]], i32 128, i1 false) +// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 %{{.+}}, i8* align 128 %{{.+}}, i64 [[SIZE]], i1 false) #endif Index: test/OpenMP/parallel_reduction_codegen.cpp =================================================================== --- test/OpenMP/parallel_reduction_codegen.cpp +++ test/OpenMP/parallel_reduction_codegen.cpp @@ -414,7 +414,7 @@ // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_REF]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_PRIV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // var1 = var1.operator &&(var1_reduction); // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_REF]]) @@ -430,7 +430,7 @@ // CHECK: call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // t_var1 = min(t_var1, t_var1_reduction); // CHECK: [[T_VAR1_VAL:%.+]] = load float, float* [[T_VAR1_REF]], @@ -468,7 +468,7 @@ // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_REF]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_PRIV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // CHECK: call void @__kmpc_end_critical( // var1 = var1.operator &&(var1_reduction); @@ -486,7 +486,7 @@ // CHECK: call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // CHECK: call void @__kmpc_end_critical( // t_var1 = min(t_var1, t_var1_reduction); @@ -568,7 +568,7 @@ // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* [[VAR_LHS]], [[S_FLOAT_TY]]* dereferenceable(4) [[VAR_RHS]]) // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_LHS]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // var1_lhs = var1_lhs.operator &&(var1_rhs); // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* [[VAR1_LHS]]) @@ -584,7 +584,7 @@ // CHECK: call void @{{.+}}([[S_FLOAT_TY]]* [[COND_LVALUE:%.+]], float [[CONV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_LHS]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // t_var1_lhs = min(t_var1_lhs, t_var1_rhs); // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load float, float* [[T_VAR1_LHS]], @@ -737,7 +737,7 @@ // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // var1 = var1.operator &&(var1_reduction); // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_REF]]) @@ -752,7 +752,7 @@ // CHECK: call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // t_var1 = min(t_var1, t_var1_reduction); // CHECK: [[T_VAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_REF]], @@ -778,7 +778,7 @@ // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // CHECK: call void @__kmpc_end_critical( // var1 = var1.operator &&(var1_reduction); @@ -796,7 +796,7 @@ // CHECK: call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // CHECK: call void @__kmpc_end_critical( // t_var1 = min(t_var1, t_var1_reduction); @@ -864,7 +864,7 @@ // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_LHS]], [[S_INT_TY]]* dereferenceable(4) [[VAR_RHS]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_LHS]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // var1_lhs = var1_lhs.operator &&(var1_rhs); // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_LHS]]) @@ -880,7 +880,7 @@ // CHECK: call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_LHS]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // t_var1_lhs = min(t_var1_lhs, t_var1_rhs); // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_LHS]], Index: test/OpenMP/sections_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/sections_firstprivate_codegen.cpp +++ test/OpenMP/sections_firstprivate_codegen.cpp @@ -225,7 +225,7 @@ // firstprivate vec(vec) // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), // firstprivate s_arr(s_arr) // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 @@ -292,7 +292,7 @@ // firstprivate vec(vec) // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]], // firstprivate s_arr(s_arr) // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 Index: test/OpenMP/sections_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/sections_lastprivate_codegen.cpp +++ test/OpenMP/sections_lastprivate_codegen.cpp @@ -342,7 +342,7 @@ // original vec[]=private_vec[]; // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]], // original s_arr[]=private_s_arr[]; // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 Index: test/OpenMP/sections_reduction_codegen.cpp =================================================================== --- test/OpenMP/sections_reduction_codegen.cpp +++ test/OpenMP/sections_reduction_codegen.cpp @@ -303,7 +303,7 @@ // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // var1 = var1.operator &&(var1_reduction); // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_REF]]) @@ -319,7 +319,7 @@ // CHECK: call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // t_var1 = min(t_var1, t_var1_reduction); // CHECK: [[T_VAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_REF]], @@ -345,7 +345,7 @@ // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* dereferenceable(4) [[VAR_PRIV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // CHECK: call void @__kmpc_end_critical( // var1 = var1.operator &&(var1_reduction); @@ -363,7 +363,7 @@ // CHECK: call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // CHECK: call void @__kmpc_end_critical( // t_var1 = min(t_var1, t_var1_reduction); @@ -430,7 +430,7 @@ // CHECK: [[UP:%.+]] = call dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* [[VAR_LHS]], [[S_INT_TY]]* dereferenceable(4) [[VAR_RHS]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_LHS]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // var1_lhs = var1_lhs.operator &&(var1_rhs); // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* [[VAR1_LHS]]) @@ -446,7 +446,7 @@ // CHECK: call void @{{.+}}([[S_INT_TY]]* [[COND_LVALUE:%.+]], i32 [[CONV]]) // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_LHS]] to i8* // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[BC1]], i8* [[BC2]], i64 4, i32 4, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false) // t_var1_lhs = min(t_var1_lhs, t_var1_rhs); // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_LHS]], Index: test/OpenMP/single_codegen.cpp =================================================================== --- test/OpenMP/single_codegen.cpp +++ test/OpenMP/single_codegen.cpp @@ -180,7 +180,7 @@ // CHECK: [[DST_A2_ADDR:%.+]] = load i8*, i8** [[DST_A2_ADDR_REF]], // CHECK: [[SRC_A2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 // CHECK: [[SRC_A2_ADDR:%.+]] = load i8*, i8** [[SRC_A2_ADDR_REF]], -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[DST_A2_ADDR]], i8* [[SRC_A2_ADDR]], i{{[0-9]+}} 2, i{{[0-9]+}} 1, i1 false) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align 1 [[DST_A2_ADDR]], i8* align 1 [[SRC_A2_ADDR]], i{{[0-9]+}} 2, i1 false) // CHECK: [[DST_TC2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 // CHECK: [[DST_TC2_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_TC2_ADDR_REF]], // CHECK: [[DST_TC2_ADDR:%.+]] = bitcast i8* [[DST_TC2_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* Index: test/OpenMP/single_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/single_firstprivate_codegen.cpp +++ test/OpenMP/single_firstprivate_codegen.cpp @@ -192,7 +192,7 @@ // firstprivate vec(vec) // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), // firstprivate s_arr(s_arr) // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 @@ -256,7 +256,7 @@ // firstprivate vec(vec) // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]], // firstprivate s_arr(s_arr) // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 Index: test/OpenMP/target_enter_data_depend_codegen.cpp =================================================================== --- test/OpenMP/target_enter_data_depend_codegen.cpp +++ test/OpenMP/target_enter_data_depend_codegen.cpp @@ -68,19 +68,19 @@ // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0 // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]], // CK1: [[BC1:%.+]] = bitcast %struct.anon* [[CAPTURES]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[SHAREDS_REF]], i8* [[BC1]], i[[sz]] 4, i32 4, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align 4 [[SHAREDS_REF]], i8* align 4 [[BC1]], i[[sz]] 4, i1 false) // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 1 // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 0 // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 1 // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 2 // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([1 x i[[sz]]]* [[SIZE00]] to i8*), i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([1 x i[[sz]]]* [[SIZE00]] to i8*), i[[sz]] {{8|4}}, i1 false) // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] @@ -124,19 +124,19 @@ // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0 // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]], // CK1: [[BC1:%.+]] = bitcast %struct.anon{{.+}}* [[CAPTURES]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[SHAREDS_REF]], i8* [[BC1]], i[[sz]] 1, i32 1, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align 1 [[SHAREDS_REF]], i8* align 1 [[BC1]], i[[sz]] 1, i1 false) // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1 // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([1 x i[[sz]]]* [[SIZE02]] to i8*), i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([1 x i[[sz]]]* [[SIZE02]] to i8*), i[[sz]] {{8|4}}, i1 false) // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] @@ -198,15 +198,15 @@ // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* // CK1: [[BC_SIZES:%.+]] = bitcast i[[sz]]* [[GEPS0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* [[BC_SIZES]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} [[BC_SIZES]], i[[sz]] {{8|4}}, i1 false) // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] @@ -272,14 +272,14 @@ // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_BASEPTRS]] to i8* // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{16|8}}, i1 false) // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_PTRS]] to i8* // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{16|8}}, i1 false) // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i[[sz]]]* [[PRIVS_SIZES]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i1 false) // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]] Index: test/OpenMP/target_exit_data_depend_codegen.cpp =================================================================== --- test/OpenMP/target_exit_data_depend_codegen.cpp +++ test/OpenMP/target_exit_data_depend_codegen.cpp @@ -68,19 +68,19 @@ // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0 // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]], // CK1: [[BC1:%.+]] = bitcast %struct.anon* [[CAPTURES]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[SHAREDS_REF]], i8* [[BC1]], i[[sz]] 4, i32 4, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align 4 [[SHAREDS_REF]], i8* align 4 [[BC1]], i[[sz]] 4, i1 false) // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 1 // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 0 // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 1 // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 2 // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([1 x i[[sz]]]* [[SIZE00]] to i8*), i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([1 x i[[sz]]]* [[SIZE00]] to i8*), i[[sz]] {{8|4}}, i1 false) // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] @@ -124,19 +124,19 @@ // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0 // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]], // CK1: [[BC1:%.+]] = bitcast %struct.anon{{.+}}* [[CAPTURES]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[SHAREDS_REF]], i8* [[BC1]], i[[sz]] 1, i32 1, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align 1 [[SHAREDS_REF]], i8* align 1 [[BC1]], i[[sz]] 1, i1 false) // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1 // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([1 x i[[sz]]]* [[SIZE02]] to i8*), i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([1 x i[[sz]]]* [[SIZE02]] to i8*), i[[sz]] {{8|4}}, i1 false) // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] @@ -198,15 +198,15 @@ // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* // CK1: [[BC_SIZES:%.+]] = bitcast i[[sz]]* [[GEPS0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* [[BC_SIZES]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} [[BC_SIZES]], i[[sz]] {{8|4}}, i1 false) // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] @@ -272,14 +272,14 @@ // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_BASEPTRS]] to i8* // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{16|8}}, i1 false) // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_PTRS]] to i8* // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{16|8}}, i1 false) // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i[[sz]]]* [[PRIVS_SIZES]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i1 false) // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]] Index: test/OpenMP/target_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/target_firstprivate_codegen.cpp +++ test/OpenMP/target_firstprivate_codegen.cpp @@ -281,7 +281,7 @@ // firstprivate(b): memcpy(b_priv,b_in) // TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x float]* [[B_PRIV]] to i8* // TCHECK: [[B_ADDR_REF_BCAST:%.+]] = bitcast [10 x float]* [[B_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_ADDR_REF_BCAST]], {{.+}}) + // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_ADDR_REF_BCAST]], {{.+}}) // TCHECK: [[RET_STACK:%.+]] = call i8* @llvm.stacksave() // TCHECK: store i8* [[RET_STACK]], i8** [[SSTACK]], @@ -291,12 +291,12 @@ // TCHECK: [[BN_COPY_SZ:%.+]] = mul{{.+}} i{{[0-9]+}} [[BN_SZ_VAL]], 4 // TCHECK: [[BN_PRIV__BCAST:%.+]] = bitcast float* [[BN_PRIV]] to i8* // TCHECK: [[BN_REF_IN_BCAST:%.+]] = bitcast float* [[BN_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[BN_PRIV__BCAST]], i8* [[BN_REF_IN_BCAST]], i{{[0-9]+}} [[BN_COPY_SZ]],{{.+}}) + // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BN_PRIV__BCAST]], i8* align {{[0-9]+}} [[BN_REF_IN_BCAST]], i{{[0-9]+}} [[BN_COPY_SZ]],{{.+}}) // firstprivate(c) // TCHECK: [[C_PRIV_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_PRIV]] to i8* // TCHECK: [[C_IN_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[C_PRIV_BCAST]], i8* [[C_IN_BCAST]],{{.+}}) + // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[C_PRIV_BCAST]], i8* align {{[0-9]+}} [[C_IN_BCAST]],{{.+}}) // firstprivate(cn) // TCHECK: [[CN_SZ:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SZ1_VAL]], [[CN_SZ2_VAL]] @@ -305,12 +305,12 @@ // TCHECK: [[CN_SZ2_CPY:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SZ2]], 8 // TCHECK: [[CN_PRIV_BCAST:%.+]] = bitcast double* [[CN_PRIV]] to i8* // TCHECK: [[CN_IN_BCAST:%.+]] = bitcast double* [[CN_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[CN_PRIV_BCAST]], i8* [[CN_IN_BCAST]], i{{[0-9]+}} [[CN_SZ2_CPY]],{{.+}}) + // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[CN_PRIV_BCAST]], i8* align {{[0-9]+}} [[CN_IN_BCAST]], i{{[0-9]+}} [[CN_SZ2_CPY]],{{.+}}) // firstprivate(d) // TCHECK: [[D_PRIV_BCAST:%.+]] = bitcast [[TT]]* [[D_PRIV]] to i8* // TCHECK: [[D_IN_BCAST:%.+]] = bitcast [[TT]]* [[D_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[D_PRIV_BCAST]], i8* [[D_IN_BCAST]],{{.+}}) + // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[D_PRIV_BCAST]], i8* align {{[0-9]+}} [[D_IN_BCAST]],{{.+}}) #pragma omp target firstprivate(ptr) @@ -391,7 +391,7 @@ // firstprivate(b) // TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8* // TCHECK: [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8* -// TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_IN_BCAST]],{{.+}}) +// TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}}) // TCHECK: ret void @@ -499,7 +499,7 @@ // TCHECK: [[C_SZ_CPY:%.+]] = mul{{.+}} i{{[0-9]+}} [[C_SZ2]], 2 // TCHECK: [[C_PRIV_BCAST:%.+]] = bitcast i{{[0-9]+}}* [[C_PRIV]] to i8* // TCHECK: [[C_IN_BCAST:%.+]] = bitcast i{{[0-9]+}}* [[C_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[C_PRIV_BCAST]], i8* [[C_IN_BCAST]],{{.+}}) + // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[C_PRIV_BCAST]], i8* align {{[0-9]+}} [[C_IN_BCAST]],{{.+}}) // finish // TCHECK: [[RELOAD_SSTACK:%.+]] = load i8*, i8** [[SSTACK]], @@ -594,7 +594,7 @@ // firstprivate(b) // TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8* // TCHECK: [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8* -// TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_IN_BCAST]],{{.+}}) +// TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}}) // TCHECK: ret void Index: test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp +++ test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp @@ -236,7 +236,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], @@ -322,7 +322,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], Index: test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp +++ test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp @@ -259,7 +259,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]], // CHECK: call void @__kmpc_for_static_fini( // lastprivates @@ -273,7 +273,7 @@ // CHECK-32: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -284,7 +284,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -293,7 +293,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], // CHECK-64: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], // CHECK-32: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR]], @@ -361,7 +361,7 @@ // CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]], // CHECK: call void @__kmpc_for_static_fini( // lastprivates @@ -375,7 +375,7 @@ // CHECK-32: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR1]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -386,7 +386,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -395,6 +395,6 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: ret void #endif Index: test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp +++ test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp @@ -236,7 +236,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], @@ -322,7 +322,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], Index: test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp +++ test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp @@ -260,7 +260,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]], // CHECK: call void @__kmpc_for_static_fini( // lastprivates @@ -274,7 +274,7 @@ // CHECK-32: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -285,7 +285,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -294,7 +294,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], // CHECK-64: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], // CHECK-32: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR]], @@ -362,7 +362,7 @@ // CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]], // CHECK: call void @__kmpc_for_static_fini( // lastprivates @@ -376,7 +376,7 @@ // CHECK-32: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR1]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -387,7 +387,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -396,6 +396,6 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: ret void #endif Index: test/OpenMP/target_update_depend_codegen.cpp =================================================================== --- test/OpenMP/target_update_depend_codegen.cpp +++ test/OpenMP/target_update_depend_codegen.cpp @@ -68,19 +68,19 @@ // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0 // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]], // CK1: [[BC1:%.+]] = bitcast %struct.anon* [[CAPTURES]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[SHAREDS_REF]], i8* [[BC1]], i[[sz]] 4, i32 4, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align 4 [[SHAREDS_REF]], i8* align 4 [[BC1]], i[[sz]] 4, i1 false) // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 1 // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 0 // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 1 // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 2 // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([1 x i[[sz]]]* [[SIZE00]] to i8*), i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([1 x i[[sz]]]* [[SIZE00]] to i8*), i[[sz]] {{8|4}}, i1 false) // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] @@ -124,19 +124,19 @@ // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0 // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]], // CK1: [[BC1:%.+]] = bitcast %struct.anon{{.+}}* [[CAPTURES]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[SHAREDS_REF]], i8* [[BC1]], i[[sz]] 1, i32 1, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align 1 [[SHAREDS_REF]], i8* align 1 [[BC1]], i[[sz]] 1, i1 false) // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1 // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([1 x i[[sz]]]* [[SIZE02]] to i8*), i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([1 x i[[sz]]]* [[SIZE02]] to i8*), i[[sz]] {{8|4}}, i1 false) // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] @@ -198,15 +198,15 @@ // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{8|4}}, i1 false) // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* // CK1: [[BC_SIZES:%.+]] = bitcast i[[sz]]* [[GEPS0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* [[BC_SIZES]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} [[BC_SIZES]], i[[sz]] {{8|4}}, i1 false) // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] @@ -272,14 +272,14 @@ // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_BASEPTRS]] to i8* // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_BASEPTRS]], i8* align {{8|4}} [[BC_BASEPTRS]], i[[sz]] {{16|8}}, i1 false) // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_PTRS]] to i8* // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{16|8}}, i1 false) // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i[[sz]]]* [[PRIVS_SIZES]] to i8* - // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i1 false) // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]] Index: test/OpenMP/task_codegen.cpp =================================================================== --- test/OpenMP/task_codegen.cpp +++ test/OpenMP/task_codegen.cpp @@ -38,7 +38,7 @@ // CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]] // CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS]]* [[CAPTURES]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[BITCAST]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[BITCAST]], i64 16, i1 false) // CHECK: [[PRIORITY_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 4 // CHECK: [[PRIORITY:%.+]] = bitcast %union{{.+}}* [[PRIORITY_REF_PTR]] to i32* // CHECK: store i32 {{.+}}, i32* [[PRIORITY]] @@ -55,7 +55,7 @@ // CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]] // CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS1]]* [[CAPTURES]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[BITCAST]], i64 8, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[BITCAST]], i64 8, i1 false) // CHECK: [[DEP:%.*]] = getelementptr inbounds [4 x [[KMP_DEPEND_INFO]]], [4 x [[KMP_DEPEND_INFO]]]* [[DEPENDENCIES:%.*]], i64 0, i64 0 // CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* [[DEP]], i32 0, i32 0 // CHECK: store i64 ptrtoint (i32* @{{.+}} to i64), i64* [[T0]] Index: test/OpenMP/task_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/task_firstprivate_codegen.cpp +++ test/OpenMP/task_firstprivate_codegen.cpp @@ -207,7 +207,7 @@ // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 40, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 40, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // Also copy address of private copy to the corresponding shareds reference. @@ -361,7 +361,7 @@ // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 32, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 Index: test/OpenMP/taskgroup_task_reduction_codegen.cpp =================================================================== --- test/OpenMP/taskgroup_task_reduction_codegen.cpp +++ test/OpenMP/taskgroup_task_reduction_codegen.cpp @@ -64,7 +64,7 @@ // CHECK-DAG: store i8* bitcast (void (i8*, i8*)* [[ACOMB:@.+]] to i8*), i8** [[TMP9]], // CHECK-DAG: [[TMP10:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 5 // CHECK-DAG: [[TMP11:%.+]] = bitcast i32* [[TMP10]] to i8* -// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* [[TMP11]], i8 0, i64 4, i32 8, i1 false) +// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP11]], i8 0, i64 4, i1 false) // CHECK-DAG: [[TMP13:%.+]] = bitcast float* [[B]] to i8* // CHECK-DAG: store i8* [[TMP13]], i8** [[TMP12:%[^,]+]], // CHECK-DAG: [[TMP12]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB:%[^,]+]], i32 0, i32 0 @@ -79,7 +79,7 @@ // CHECK-DAG: store i8* bitcast (void (i8*, i8*)* [[BCOMB:@.+]] to i8*), i8** [[TMP17]], // CHECK-DAG: [[TMP18:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 5 // CHECK-DAG: [[TMP19:%.+]] = bitcast i32* [[TMP18]] to i8* -// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* [[TMP19]], i8 0, i64 4, i32 8, i1 false) +// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP19]], i8 0, i64 4, i1 false) // CHECK-DAG: [[TMP21:%.+]] = bitcast i32* [[ARGC_ADDR]] to i8* // CHECK-DAG: store i8* [[TMP21]], i8** [[TMP20:%[^,]+]], // CHECK-DAG: [[TMP20]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC:%[^,]+]], i32 0, i32 0 @@ -94,7 +94,7 @@ // CHECK-DAG: store i8* bitcast (void (i8*, i8*)* [[ARGCCOMB:@.+]] to i8*), i8** [[TMP25]], // CHECK-DAG: [[TMP26:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 5 // CHECK-DAG: [[TMP27:%.+]] = bitcast i32* [[TMP26]] to i8* -// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* [[TMP27]], i8 0, i64 4, i32 8, i1 false) +// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false) // CHECK-DAG: [[TMP28:%.+]] = bitcast [3 x [[T1]]]* [[RD_IN1]] to i8* // CHECK-DAG: [[TMP29:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* [[TMP28]]) // DEBUG-DAG: call void @llvm.dbg.declare(metadata i8** [[TD1]], @@ -114,7 +114,7 @@ // CHECK-DAG: store i8* bitcast (void (i8*, i8*)* [[CCOMB:@.+]] to i8*), i8** [[TMP35]], // CHECK-DAG: [[TMP36:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 5 // CHECK-DAG: [[TMP37:%.+]] = bitcast i32* [[TMP36]] to i8* -// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* [[TMP37]], i8 0, i64 4, i32 8, i1 false) +// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP37]], i8 0, i64 4, i1 false) // CHECK-DAG: [[TMP39:%.+]] = bitcast i16* [[VLA]] to i8* // CHECK-DAG: store i8* [[TMP39]], i8** [[TMP38:%[^,]+]], // CHECK-DAG: [[TMP38]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA:%[^,]+]], i32 0, i32 0 Index: test/OpenMP/taskloop_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/taskloop_firstprivate_codegen.cpp +++ test/OpenMP/taskloop_firstprivate_codegen.cpp @@ -206,7 +206,7 @@ // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 40, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 40, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // Also copy address of private copy to the corresponding shareds reference. @@ -376,7 +376,7 @@ // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 32, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 Index: test/OpenMP/taskloop_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/taskloop_lastprivate_codegen.cpp +++ test/OpenMP/taskloop_lastprivate_codegen.cpp @@ -198,7 +198,7 @@ // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 40, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 40, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // Also copy address of private copy to the corresponding shareds reference. @@ -287,18 +287,18 @@ // CHECK-NEXT: br i1 // CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8* // CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % // CHECK: load i32, i32* % // CHECK: store i32 %{{.+}}, i32* % // CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* % // CHECK: phi [[S_DOUBLE_TY]]* // CHECK: phi [[S_DOUBLE_TY]]* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % // CHECK: icmp eq [[S_DOUBLE_TY]]* % // CHECK-NEXT: br i1 // CHECK: bitcast [2 x i32]* %{{.+}} to i8* // CHECK: bitcast [2 x i32]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % // CHECK: load i32, i32* % // CHECK: store i32 %{{.+}}, i32* % // CHECK: br label @@ -372,7 +372,7 @@ // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 32, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 @@ -453,16 +453,16 @@ // CHECK: store i32 %{{.+}}, i32* % // CHECK: bitcast [2 x i32]* %{{.+}} to i8* // CHECK: bitcast [2 x i32]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % // CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* % // CHECK: phi [[S_INT_TY]]* // CHECK: phi [[S_INT_TY]]* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % // CHECK: icmp eq [[S_INT_TY]]* % // CHECK-NEXT: br i1 // CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8* // CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % // CHECK: br label // CHECK: ret Index: test/OpenMP/taskloop_reduction_codegen.cpp =================================================================== --- test/OpenMP/taskloop_reduction_codegen.cpp +++ test/OpenMP/taskloop_reduction_codegen.cpp @@ -79,7 +79,7 @@ // CHECK-DAG: store i8* bitcast (void (i8*, i8*)* [[RED_COMB1:@.+]] to i8*), i8** [[TMP25]], // CHECK-DAG: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 // CHECK-DAG: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8* -// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* [[TMP27]], i8 0, i64 4, i32 8, i1 false) +// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false) // CHECK-DAG: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 0 // CHECK-DAG: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, % // CHECK-DAG: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 [[LB_ADD_LEN]] @@ -116,7 +116,7 @@ // CHECK-DAG: store i8* bitcast (void (i8*, i8*)* [[RED_COMB3:@.+]] to i8*), i8** [[TMP49]], // CHECK-DAG: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 // CHECK-DAG: [[TMP51:%.*]] = bitcast i32* [[TMP50]] to i8* -// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* [[TMP51]], i8 0, i64 4, i32 8, i1 false) +// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP51]], i8 0, i64 4, i1 false) // CHECK-DAG: [[TMP53:%.*]] = bitcast float* [[VLA]] to i8* // CHECK-DAG: store i8* [[TMP53]], i8** [[TMP52:%[^,]+]], // CHECK-DAG: [[TMP52]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0 Index: test/OpenMP/taskloop_simd_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/taskloop_simd_firstprivate_codegen.cpp +++ test/OpenMP/taskloop_simd_firstprivate_codegen.cpp @@ -206,7 +206,7 @@ // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 40, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 40, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // Also copy address of private copy to the corresponding shareds reference. @@ -376,7 +376,7 @@ // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 32, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 Index: test/OpenMP/taskloop_simd_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/taskloop_simd_lastprivate_codegen.cpp +++ test/OpenMP/taskloop_simd_lastprivate_codegen.cpp @@ -198,7 +198,7 @@ // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 40, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 40, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // Also copy address of private copy to the corresponding shareds reference. @@ -287,18 +287,18 @@ // CHECK-NEXT: br i1 // CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8* // CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % // CHECK: load i32, i32* % // CHECK: store i32 %{{.+}}, i32* % // CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* % // CHECK: phi [[S_DOUBLE_TY]]* // CHECK: phi [[S_DOUBLE_TY]]* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % // CHECK: icmp eq [[S_DOUBLE_TY]]* % // CHECK-NEXT: br i1 // CHECK: bitcast [2 x i32]* %{{.+}} to i8* // CHECK: bitcast [2 x i32]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % // CHECK: load i32, i32* % // CHECK: store i32 %{{.+}}, i32* % // CHECK: br label @@ -372,7 +372,7 @@ // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 32, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 @@ -453,16 +453,16 @@ // CHECK: store i32 %{{.+}}, i32* % // CHECK: bitcast [2 x i32]* %{{.+}} to i8* // CHECK: bitcast [2 x i32]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % // CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* % // CHECK: phi [[S_INT_TY]]* // CHECK: phi [[S_INT_TY]]* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % // CHECK: icmp eq [[S_INT_TY]]* % // CHECK-NEXT: br i1 // CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8* // CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % // CHECK: br label // CHECK: ret Index: test/OpenMP/taskloop_simd_reduction_codegen.cpp =================================================================== --- test/OpenMP/taskloop_simd_reduction_codegen.cpp +++ test/OpenMP/taskloop_simd_reduction_codegen.cpp @@ -79,7 +79,7 @@ // CHECK-DAG: store i8* bitcast (void (i8*, i8*)* [[RED_COMB1:@.+]] to i8*), i8** [[TMP25]], // CHECK-DAG: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 // CHECK-DAG: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8* -// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* [[TMP27]], i8 0, i64 4, i32 8, i1 false) +// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false) // CHECK-DAG: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 0 // CHECK-DAG: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, % // CHECK-DAG: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 [[LB_ADD_LEN]] @@ -116,7 +116,7 @@ // CHECK-DAG: store i8* bitcast (void (i8*, i8*)* [[RED_COMB3:@.+]] to i8*), i8** [[TMP49]], // CHECK-DAG: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 // CHECK-DAG: [[TMP51:%.*]] = bitcast i32* [[TMP50]] to i8* -// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* [[TMP51]], i8 0, i64 4, i32 8, i1 false) +// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP51]], i8 0, i64 4, i1 false) // CHECK-DAG: [[TMP53:%.*]] = bitcast float* [[VLA]] to i8* // CHECK-DAG: store i8* [[TMP53]], i8** [[TMP52:%[^,]+]], // CHECK-DAG: [[TMP52]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0 Index: test/OpenMP/teams_distribute_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/teams_distribute_firstprivate_codegen.cpp +++ test/OpenMP/teams_distribute_firstprivate_codegen.cpp @@ -230,7 +230,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], @@ -316,7 +316,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], Index: test/OpenMP/teams_distribute_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/teams_distribute_lastprivate_codegen.cpp +++ test/OpenMP/teams_distribute_lastprivate_codegen.cpp @@ -247,7 +247,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]], // CHECK: call void @__kmpc_for_static_fini( // lastprivates @@ -260,7 +260,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -271,7 +271,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -280,7 +280,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], // CHECK: ret void @@ -347,7 +347,7 @@ // CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]], // CHECK: call void @__kmpc_for_static_fini( // lastprivates @@ -360,7 +360,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -371,7 +371,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -380,6 +380,6 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: ret void #endif Index: test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp +++ test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -264,7 +264,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], @@ -330,7 +330,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], @@ -415,7 +415,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], @@ -476,7 +476,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], Index: test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp +++ test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp @@ -304,7 +304,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -315,7 +315,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -324,7 +324,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], // CHECK: ret void @@ -381,7 +381,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]], // CHECK: call void @__kmpc_for_static_fini( @@ -395,7 +395,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -406,7 +406,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -415,7 +415,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], // CHECK: ret void @@ -484,7 +484,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -495,7 +495,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -504,7 +504,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: ret void // CHECK: define internal void [[TPAR_OUTL:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]]) @@ -560,7 +560,7 @@ // CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]], // CHECK: call void @__kmpc_for_static_fini( @@ -574,7 +574,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -585,7 +585,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -594,7 +594,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: ret void #endif Index: test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -267,7 +267,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], @@ -333,7 +333,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], @@ -418,7 +418,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], @@ -479,7 +479,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], Index: test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp +++ test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -311,7 +311,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -322,7 +322,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -331,7 +331,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], // CHECK: ret void @@ -388,7 +388,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]], // CHECK: call void @__kmpc_for_static_fini( @@ -405,7 +405,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -416,7 +416,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -425,7 +425,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], // CHECK: ret void @@ -494,7 +494,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -505,7 +505,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -514,7 +514,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: ret void // CHECK: define internal void [[TPAR_OUTL:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]]) @@ -570,7 +570,7 @@ // CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]], // CHECK: call void @__kmpc_for_static_fini( @@ -587,7 +587,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -598,7 +598,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -607,7 +607,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: ret void // CHECK: !{!"llvm.loop.vectorize.enable", i1 true} Index: test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp +++ test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp @@ -230,7 +230,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], @@ -316,7 +316,7 @@ // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 // CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) // CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], Index: test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp +++ test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp @@ -248,7 +248,7 @@ // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]], // CHECK: call void @__kmpc_for_static_fini( // lastprivates @@ -261,7 +261,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -272,7 +272,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -281,7 +281,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], // CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], // CHECK: ret void @@ -348,7 +348,7 @@ // CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8* // CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]], +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_PTR_BCAST1]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST1]], // CHECK: call void @__kmpc_for_static_fini( // lastprivates @@ -361,7 +361,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], // CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* // CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BCAST_VEC_ADDR_REF]], i8* align {{[0-9]+}} [[BCAST_VEC_PRIV]], // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* // CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -372,7 +372,7 @@ // CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} // CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* // CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[S_ARR_DST_BCAST]], i8* align {{[0-9]+}} [[S_ARR_SRC_BCAST]]{{.+}}) // CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 // CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} // CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] @@ -381,7 +381,7 @@ // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], // CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}}) // CHECK: ret void // CHECK: !{!"llvm.loop.vectorize.enable", i1 true} #endif Index: test/OpenMP/teams_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/teams_firstprivate_codegen.cpp +++ test/OpenMP/teams_firstprivate_codegen.cpp @@ -172,7 +172,7 @@ // CHECK-64: [[SIVAR7_CONV:%.+]] = bitcast i64* [[SIVAR7_PRIV]] to i32* // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]], // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF]] to [[S_FLOAT_TY]]* // CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 @@ -224,7 +224,7 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]], align 128 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]], i{{[0-9]+}} {{[0-9]+}}, i{{[0-9]+}} 128, +// CHECK: call void @llvm.memcpy.{{.+}}(i8* align 128 [[VEC_DEST]], i8* align 128 [[VEC_SRC]], i{{[0-9]+}} {{[0-9]+}}, // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_REF]] to [[S_INT_TY]]* // CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 @@ -284,7 +284,7 @@ // ARRAY-DAG: store double* %{{.+}}, double** [[PRIV_VLA2]], // ARRAY: call i8* @llvm.stacksave() // ARRAY: [[SIZE:%.+]] = mul nuw i{{[0-9]+}} %{{.+}}, 8 -// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i{{[0-9]+}}(i8* %{{.+}}, i8* %{{.+}}, i{{[0-9]+}} [[SIZE]], i32 128, i1 false) +// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i{{[0-9]+}}(i8* align 128 %{{.+}}, i8* align 128 %{{.+}}, i{{[0-9]+}} [[SIZE]], i1 false) #pragma omp target #pragma omp teams firstprivate(a, s, vla1, vla2) s[0].St_func(s, n, vla1); @@ -302,6 +302,6 @@ // ARRAY-DAG: store double* %{{.+}}, double** [[PRIV_VLA2]], // ARRAY: call i8* @llvm.stacksave() // ARRAY: [[SIZE:%.+]] = mul nuw i{{[0-9]+}} %{{.+}}, 8 -// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i{{[0-9]+}}(i8* %{{.+}}, i8* %{{.+}}, i{{[0-9]+}} [[SIZE]], i32 128, i1 false) +// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i{{[0-9]+}}(i8* align 128 %{{.+}}, i8* align 128 %{{.+}}, i{{[0-9]+}} [[SIZE]], i1 false) #endif #endif