Index: lib/Headers/altivec.h =================================================================== --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -16361,27 +16361,32 @@ static inline __ATTRS_o_ai vector signed short vec_xl(signed long long __offset, signed short *__ptr) { - return *(unaligned_vec_sshort *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_sshort *)__addr; } static inline __ATTRS_o_ai vector unsigned short vec_xl(signed long long __offset, unsigned short *__ptr) { - return *(unaligned_vec_ushort *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_ushort *)__addr; } static inline __ATTRS_o_ai vector signed int vec_xl(signed long long __offset, signed int *__ptr) { - return *(unaligned_vec_sint *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_sint *)__addr; } static inline __ATTRS_o_ai vector unsigned int vec_xl(signed long long __offset, unsigned int *__ptr) { - return *(unaligned_vec_uint *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_uint *)__addr; } static inline __ATTRS_o_ai vector float vec_xl(signed long long __offset, float *__ptr) { - return *(unaligned_vec_float *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_float *)__addr; } #ifdef __VSX__ @@ -16391,17 +16396,20 @@ static inline __ATTRS_o_ai vector signed long long vec_xl(signed long long __offset, signed long long *__ptr) { - return *(unaligned_vec_sll *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_sll *)__addr; } static inline __ATTRS_o_ai vector unsigned long long vec_xl(signed long long __offset, unsigned long long *__ptr) { - return *(unaligned_vec_ull *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_ull *)__addr; } static inline __ATTRS_o_ai vector double vec_xl(signed long long __offset, double *__ptr) { - return *(unaligned_vec_double *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_double *)__addr; } #endif @@ -16411,12 +16419,14 @@ __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed __int128 vec_xl(signed long long __offset, signed __int128 *__ptr) { - return *(unaligned_vec_si128 *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_si128 *)__addr; } static inline __ATTRS_o_ai vector unsigned __int128 vec_xl(signed long long __offset, unsigned __int128 *__ptr) { - return *(unaligned_vec_ui128 *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_ui128 *)__addr; } #endif @@ -16513,50 +16523,58 @@ static inline __ATTRS_o_ai void vec_xst(vector signed short __vec, signed long long __offset, signed short *__ptr) { - *(unaligned_vec_sshort *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_sshort *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned short __vec, signed long long __offset, unsigned short *__ptr) { - *(unaligned_vec_ushort *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_ushort *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector signed int __vec, signed long long __offset, signed int *__ptr) { - *(unaligned_vec_sint *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_sint *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned int __vec, signed long long __offset, unsigned int *__ptr) { - *(unaligned_vec_uint *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_uint *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector float __vec, signed long long __offset, float *__ptr) { - *(unaligned_vec_float *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_float *)__addr = __vec; } #ifdef __VSX__ static inline __ATTRS_o_ai void vec_xst(vector signed long long __vec, signed long long __offset, signed long long *__ptr) { - *(unaligned_vec_sll *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_sll *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned long long __vec, signed long long __offset, unsigned long long *__ptr) { - *(unaligned_vec_ull *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_ull *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector double __vec, signed long long __offset, double *__ptr) { - *(unaligned_vec_double *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_double *)__addr = __vec; } #endif @@ -16564,13 +16582,15 @@ static inline __ATTRS_o_ai void vec_xst(vector signed __int128 __vec, signed long long __offset, signed __int128 *__ptr) { - *(unaligned_vec_si128 *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_si128 *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned __int128 __vec, signed long long __offset, unsigned __int128 *__ptr) { - *(unaligned_vec_ui128 *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_ui128 *)__addr = __vec; } #endif Index: test/CodeGen/builtins-ppc-xl-xst.c =================================================================== --- test/CodeGen/builtins-ppc-xl-xst.c +++ test/CodeGen/builtins-ppc-xl-xst.c @@ -0,0 +1,490 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -target-feature +power8-vector -triple powerpc64le-unknown-unknown \ +// RUN: -emit-llvm %s -o - | FileCheck %s -check-prefixes=CHECK,CHECK-P8 +#include + +// CHECK-LABEL: @test1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[__VEC_ADDR_I:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I2:%.*]] = alloca i16*, align 8 +// CHECK-NEXT: [[__ADDR_I3:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i16*, align 8 +// CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca <8 x i16>*, align 8 +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i16*, align 8 +// CHECK-NEXT: store <8 x i16>* [[C:%.*]], <8 x i16>** [[C_ADDR]], align 8 +// CHECK-NEXT: store i16* [[PTR:%.*]], i16** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: store i16* [[TMP0]], i16** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i16*, i16** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP1]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]] +// CHECK-NEXT: store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <8 x i16>* +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[TMP5]], align 1 +// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i16>*, <8 x i16>** [[C_ADDR]], align 8 +// CHECK-NEXT: store <8 x i16> [[TMP6]], <8 x i16>* [[TMP7]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>*, <8 x i16>** [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[TMP8]], align 16 +// CHECK-NEXT: [[TMP10:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: store i16* [[TMP10]], i16** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load i16*, i16** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP11]] to i8* +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]] +// CHECK-NEXT: store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i16>, <8 x i16>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <8 x i16>* +// CHECK-NEXT: store <8 x i16> [[TMP14]], <8 x i16>* [[TMP16]], align 1 +// CHECK-NEXT: ret void +// +void test1(vector signed short *c, signed short *ptr) { + *c = vec_xl(3ll, ptr); + vec_xst(*c, 7ll, ptr); +} + +// CHECK-LABEL: @test2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[__VEC_ADDR_I:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I2:%.*]] = alloca i16*, align 8 +// CHECK-NEXT: [[__ADDR_I3:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i16*, align 8 +// CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca <8 x i16>*, align 8 +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i16*, align 8 +// CHECK-NEXT: store <8 x i16>* [[C:%.*]], <8 x i16>** [[C_ADDR]], align 8 +// CHECK-NEXT: store i16* [[PTR:%.*]], i16** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: store i16* [[TMP0]], i16** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i16*, i16** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP1]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]] +// CHECK-NEXT: store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <8 x i16>* +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[TMP5]], align 1 +// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i16>*, <8 x i16>** [[C_ADDR]], align 8 +// CHECK-NEXT: store <8 x i16> [[TMP6]], <8 x i16>* [[TMP7]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>*, <8 x i16>** [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[TMP8]], align 16 +// CHECK-NEXT: [[TMP10:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: store i16* [[TMP10]], i16** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load i16*, i16** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP11]] to i8* +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]] +// CHECK-NEXT: store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i16>, <8 x i16>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <8 x i16>* +// CHECK-NEXT: store <8 x i16> [[TMP14]], <8 x i16>* [[TMP16]], align 1 +// CHECK-NEXT: ret void +// +void test2(vector unsigned short *c, unsigned short *ptr) { + *c = vec_xl(3ll, ptr); + vec_xst(*c, 7ll, ptr); +} + +// CHECK-LABEL: @test3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[__VEC_ADDR_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I2:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[__ADDR_I3:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca <4 x i32>*, align 8 +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: store <4 x i32>* [[C:%.*]], <4 x i32>** [[C_ADDR]], align 8 +// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: store i32* [[TMP0]], i32** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]] +// CHECK-NEXT: store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <4 x i32>* +// CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 1 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>*, <4 x i32>** [[C_ADDR]], align 8 +// CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>*, <4 x i32>** [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 16 +// CHECK-NEXT: [[TMP10:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: store i32* [[TMP10]], i32** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load i32*, i32** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to i8* +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]] +// CHECK-NEXT: store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <4 x i32>* +// CHECK-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP16]], align 1 +// CHECK-NEXT: ret void +// +void test3(vector signed int *c, signed int *ptr) { + *c = vec_xl(3ll, ptr); + vec_xst(*c, 7ll, ptr); +} + +// CHECK-LABEL: @test4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[__VEC_ADDR_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I2:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[__ADDR_I3:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca <4 x i32>*, align 8 +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: store <4 x i32>* [[C:%.*]], <4 x i32>** [[C_ADDR]], align 8 +// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: store i32* [[TMP0]], i32** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]] +// CHECK-NEXT: store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <4 x i32>* +// CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 1 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>*, <4 x i32>** [[C_ADDR]], align 8 +// CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>*, <4 x i32>** [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 16 +// CHECK-NEXT: [[TMP10:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: store i32* [[TMP10]], i32** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load i32*, i32** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to i8* +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]] +// CHECK-NEXT: store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <4 x i32>* +// CHECK-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP16]], align 1 +// CHECK-NEXT: ret void +// +void test4(vector unsigned int *c, unsigned int *ptr) { + *c = vec_xl(3ll, ptr); + vec_xst(*c, 7ll, ptr); +} + +// CHECK-LABEL: @test5( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[__VEC_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I2:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: [[__ADDR_I3:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca <2 x i64>*, align 8 +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: store <2 x i64>* [[C:%.*]], <2 x i64>** [[C_ADDR]], align 8 +// CHECK-NEXT: store i64* [[PTR:%.*]], i64** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: store i64* [[TMP0]], i64** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64*, i64** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[TMP1]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]] +// CHECK-NEXT: store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <2 x i64>* +// CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 1 +// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>*, <2 x i64>** [[C_ADDR]], align 8 +// CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>*, <2 x i64>** [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 16 +// CHECK-NEXT: [[TMP10:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: store i64* [[TMP10]], i64** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load i64*, i64** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[TMP11]] to i8* +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]] +// CHECK-NEXT: store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <2 x i64>* +// CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[TMP16]], align 1 +// CHECK-NEXT: ret void +// +void test5(vector signed long long *c, signed long long *ptr) { + *c = vec_xl(3ll, ptr); + vec_xst(*c, 7ll, ptr); +} + +// CHECK-LABEL: @test6( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[__VEC_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I2:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: [[__ADDR_I3:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca <2 x i64>*, align 8 +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: store <2 x i64>* [[C:%.*]], <2 x i64>** [[C_ADDR]], align 8 +// CHECK-NEXT: store i64* [[PTR:%.*]], i64** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: store i64* [[TMP0]], i64** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64*, i64** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[TMP1]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]] +// CHECK-NEXT: store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <2 x i64>* +// CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 1 +// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>*, <2 x i64>** [[C_ADDR]], align 8 +// CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>*, <2 x i64>** [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 16 +// CHECK-NEXT: [[TMP10:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: store i64* [[TMP10]], i64** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load i64*, i64** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[TMP11]] to i8* +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]] +// CHECK-NEXT: store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <2 x i64>* +// CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[TMP16]], align 1 +// CHECK-NEXT: ret void +// +void test6(vector unsigned long long *c, unsigned long long *ptr) { + *c = vec_xl(3ll, ptr); + vec_xst(*c, 7ll, ptr); +} + +// CHECK-LABEL: @test7( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[__VEC_ADDR_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I2:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[__ADDR_I3:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca <4 x float>*, align 8 +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: store <4 x float>* [[C:%.*]], <4 x float>** [[C_ADDR]], align 8 +// CHECK-NEXT: store float* [[PTR:%.*]], float** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load float*, float** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: store float* [[TMP0]], float** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load float*, float** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[TMP1]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]] +// CHECK-NEXT: store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <4 x float>* +// CHECK-NEXT: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[TMP5]], align 1 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>*, <4 x float>** [[C_ADDR]], align 8 +// CHECK-NEXT: store <4 x float> [[TMP6]], <4 x float>* [[TMP7]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>*, <4 x float>** [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[TMP8]], align 16 +// CHECK-NEXT: [[TMP10:%.*]] = load float*, float** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store <4 x float> [[TMP9]], <4 x float>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: store float* [[TMP10]], float** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load float*, float** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = bitcast float* [[TMP11]] to i8* +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]] +// CHECK-NEXT: store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, <4 x float>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <4 x float>* +// CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[TMP16]], align 1 +// CHECK-NEXT: ret void +// +void test7(vector float *c, float *ptr) { + *c = vec_xl(3ll, ptr); + vec_xst(*c, 7ll, ptr); +} + +// CHECK-LABEL: @test8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[__VEC_ADDR_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I2:%.*]] = alloca double*, align 8 +// CHECK-NEXT: [[__ADDR_I3:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca double*, align 8 +// CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca <2 x double>*, align 8 +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK-NEXT: store <2 x double>* [[C:%.*]], <2 x double>** [[C_ADDR]], align 8 +// CHECK-NEXT: store double* [[PTR:%.*]], double** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: store double* [[TMP0]], double** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load double*, double** [[__PTR_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]] +// CHECK-NEXT: store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <2 x double>* +// CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 1 +// CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>*, <2 x double>** [[C_ADDR]], align 8 +// CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>*, <2 x double>** [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[TMP8]], align 16 +// CHECK-NEXT: [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: store double* [[TMP10]], double** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load double*, double** [[__PTR_ADDR_I2]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to i8* +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-NEXT: [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]] +// CHECK-NEXT: store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = load <2 x double>, <2 x double>* [[__VEC_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <2 x double>* +// CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[TMP16]], align 1 +// CHECK-NEXT: ret void +// +void test8(vector double *c, double *ptr) { + *c = vec_xl(3ll, ptr); + vec_xst(*c, 7ll, ptr); +} + +#ifdef __POWER8_VECTOR__ +// CHECK-P8-LABEL: @test9( +// CHECK-P8-NEXT: entry: +// CHECK-P8-NEXT: [[__VEC_ADDR_I:%.*]] = alloca <1 x i128>, align 16 +// CHECK-P8-NEXT: [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8 +// CHECK-P8-NEXT: [[__PTR_ADDR_I2:%.*]] = alloca i128*, align 8 +// CHECK-P8-NEXT: [[__ADDR_I3:%.*]] = alloca i8*, align 8 +// CHECK-P8-NEXT: [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-P8-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i128*, align 8 +// CHECK-P8-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-P8-NEXT: [[C_ADDR:%.*]] = alloca <1 x i128>*, align 8 +// CHECK-P8-NEXT: [[PTR_ADDR:%.*]] = alloca i128*, align 8 +// CHECK-P8-NEXT: store <1 x i128>* [[C:%.*]], <1 x i128>** [[C_ADDR]], align 8 +// CHECK-P8-NEXT: store i128* [[PTR:%.*]], i128** [[PTR_ADDR]], align 8 +// CHECK-P8-NEXT: [[TMP0:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8 +// CHECK-P8-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-P8-NEXT: store i128* [[TMP0]], i128** [[__PTR_ADDR_I]], align 8 +// CHECK-P8-NEXT: [[TMP1:%.*]] = load i128*, i128** [[__PTR_ADDR_I]], align 8 +// CHECK-P8-NEXT: [[TMP2:%.*]] = bitcast i128* [[TMP1]] to i8* +// CHECK-P8-NEXT: [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-P8-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]] +// CHECK-P8-NEXT: store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8 +// CHECK-P8-NEXT: [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8 +// CHECK-P8-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <1 x i128>* +// CHECK-P8-NEXT: [[TMP6:%.*]] = load <1 x i128>, <1 x i128>* [[TMP5]], align 1 +// CHECK-P8-NEXT: [[TMP7:%.*]] = load <1 x i128>*, <1 x i128>** [[C_ADDR]], align 8 +// CHECK-P8-NEXT: store <1 x i128> [[TMP6]], <1 x i128>* [[TMP7]], align 16 +// CHECK-P8-NEXT: [[TMP8:%.*]] = load <1 x i128>*, <1 x i128>** [[C_ADDR]], align 8 +// CHECK-P8-NEXT: [[TMP9:%.*]] = load <1 x i128>, <1 x i128>* [[TMP8]], align 16 +// CHECK-P8-NEXT: [[TMP10:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8 +// CHECK-P8-NEXT: store <1 x i128> [[TMP9]], <1 x i128>* [[__VEC_ADDR_I]], align 16 +// CHECK-P8-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-P8-NEXT: store i128* [[TMP10]], i128** [[__PTR_ADDR_I2]], align 8 +// CHECK-P8-NEXT: [[TMP11:%.*]] = load i128*, i128** [[__PTR_ADDR_I2]], align 8 +// CHECK-P8-NEXT: [[TMP12:%.*]] = bitcast i128* [[TMP11]] to i8* +// CHECK-P8-NEXT: [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-P8-NEXT: [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]] +// CHECK-P8-NEXT: store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8 +// CHECK-P8-NEXT: [[TMP14:%.*]] = load <1 x i128>, <1 x i128>* [[__VEC_ADDR_I]], align 16 +// CHECK-P8-NEXT: [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8 +// CHECK-P8-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <1 x i128>* +// CHECK-P8-NEXT: store <1 x i128> [[TMP14]], <1 x i128>* [[TMP16]], align 1 +// CHECK-P8-NEXT: ret void +// +void test9(vector signed __int128 *c, signed __int128 *ptr) { + *c = vec_xl(3ll, ptr); + vec_xst(*c, 7ll, ptr); +} + +// CHECK-P8-LABEL: @test10( +// CHECK-P8-NEXT: entry: +// CHECK-P8-NEXT: [[__VEC_ADDR_I:%.*]] = alloca <1 x i128>, align 16 +// CHECK-P8-NEXT: [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8 +// CHECK-P8-NEXT: [[__PTR_ADDR_I2:%.*]] = alloca i128*, align 8 +// CHECK-P8-NEXT: [[__ADDR_I3:%.*]] = alloca i8*, align 8 +// CHECK-P8-NEXT: [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-P8-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i128*, align 8 +// CHECK-P8-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-P8-NEXT: [[C_ADDR:%.*]] = alloca <1 x i128>*, align 8 +// CHECK-P8-NEXT: [[PTR_ADDR:%.*]] = alloca i128*, align 8 +// CHECK-P8-NEXT: store <1 x i128>* [[C:%.*]], <1 x i128>** [[C_ADDR]], align 8 +// CHECK-P8-NEXT: store i128* [[PTR:%.*]], i128** [[PTR_ADDR]], align 8 +// CHECK-P8-NEXT: [[TMP0:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8 +// CHECK-P8-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-P8-NEXT: store i128* [[TMP0]], i128** [[__PTR_ADDR_I]], align 8 +// CHECK-P8-NEXT: [[TMP1:%.*]] = load i128*, i128** [[__PTR_ADDR_I]], align 8 +// CHECK-P8-NEXT: [[TMP2:%.*]] = bitcast i128* [[TMP1]] to i8* +// CHECK-P8-NEXT: [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8 +// CHECK-P8-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]] +// CHECK-P8-NEXT: store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8 +// CHECK-P8-NEXT: [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8 +// CHECK-P8-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <1 x i128>* +// CHECK-P8-NEXT: [[TMP6:%.*]] = load <1 x i128>, <1 x i128>* [[TMP5]], align 1 +// CHECK-P8-NEXT: [[TMP7:%.*]] = load <1 x i128>*, <1 x i128>** [[C_ADDR]], align 8 +// CHECK-P8-NEXT: store <1 x i128> [[TMP6]], <1 x i128>* [[TMP7]], align 16 +// CHECK-P8-NEXT: [[TMP8:%.*]] = load <1 x i128>*, <1 x i128>** [[C_ADDR]], align 8 +// CHECK-P8-NEXT: [[TMP9:%.*]] = load <1 x i128>, <1 x i128>* [[TMP8]], align 16 +// CHECK-P8-NEXT: [[TMP10:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8 +// CHECK-P8-NEXT: store <1 x i128> [[TMP9]], <1 x i128>* [[__VEC_ADDR_I]], align 16 +// CHECK-P8-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-P8-NEXT: store i128* [[TMP10]], i128** [[__PTR_ADDR_I2]], align 8 +// CHECK-P8-NEXT: [[TMP11:%.*]] = load i128*, i128** [[__PTR_ADDR_I2]], align 8 +// CHECK-P8-NEXT: [[TMP12:%.*]] = bitcast i128* [[TMP11]] to i8* +// CHECK-P8-NEXT: [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8 +// CHECK-P8-NEXT: [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]] +// CHECK-P8-NEXT: store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8 +// CHECK-P8-NEXT: [[TMP14:%.*]] = load <1 x i128>, <1 x i128>* [[__VEC_ADDR_I]], align 16 +// CHECK-P8-NEXT: [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8 +// CHECK-P8-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <1 x i128>* +// CHECK-P8-NEXT: store <1 x i128> [[TMP14]], <1 x i128>* [[TMP16]], align 1 +// CHECK-P8-NEXT: ret void +// +void test10(vector unsigned __int128 *c, unsigned __int128 *ptr) { + *c = vec_xl(3ll, ptr); + vec_xst(*c, 7ll, ptr); +} +#endif