Index: lib/Headers/altivec.h =================================================================== --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -9492,49 +9492,51 @@ /* vec_sr */ -static __inline__ vector signed char __ATTRS_o_ai -vec_sr(vector signed char __a, vector unsigned char __b) { - vector unsigned char __res = (vector unsigned char)__a >> __b; - return (vector signed char)__res; -} - +// vec_sr does modulo arithmetic on __b first, so __b is allowed to be more +// than the length of __a. static __inline__ vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, vector unsigned char __b) { - return __a >> __b; + return __a >> + (__b % (vector unsigned char)(sizeof(unsigned char) * __CHAR_BIT__)); } -static __inline__ vector signed short __ATTRS_o_ai -vec_sr(vector signed short __a, vector unsigned short __b) { - vector unsigned short __res = (vector unsigned short)__a >> __b; - return (vector signed short)__res; +static __inline__ vector signed char __ATTRS_o_ai +vec_sr(vector signed char __a, vector unsigned char __b) { + return (vector signed char)vec_sr((vector unsigned char)__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_sr(vector unsigned short __a, vector unsigned short __b) { - return __a >> __b; + return __a >> + (__b % (vector unsigned short)(sizeof(unsigned short) * __CHAR_BIT__)); } -static __inline__ vector signed int __ATTRS_o_ai -vec_sr(vector signed int __a, vector unsigned int __b) { - vector unsigned int __res = (vector unsigned int)__a >> __b; - return (vector signed int)__res; +static __inline__ vector short __ATTRS_o_ai vec_sr(vector short __a, + vector unsigned short __b) { + return (vector short)vec_sr((vector unsigned short)__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sr(vector unsigned int __a, vector unsigned int __b) { - return __a >> __b; + return __a >> + (__b % (vector unsigned int)(sizeof(unsigned int) * __CHAR_BIT__)); } -#ifdef __POWER8_VECTOR__ -static __inline__ vector signed long long __ATTRS_o_ai -vec_sr(vector signed long long __a, vector unsigned long long __b) { - vector unsigned long long __res = (vector unsigned long long)__a >> __b; - return (vector signed long long)__res; +static __inline__ vector int __ATTRS_o_ai vec_sr(vector int __a, + vector unsigned int __b) { + return (vector int)vec_sr((vector unsigned int)__a, __b); } +#ifdef __POWER8_VECTOR__ static __inline__ vector unsigned long long __ATTRS_o_ai vec_sr(vector unsigned long long __a, vector unsigned long long __b) { - return __a >> __b; + return __a >> (__b % (vector unsigned long long)(sizeof(unsigned long long) * + __CHAR_BIT__)); +} + +static __inline__ vector long long __ATTRS_o_ai +vec_sr(vector long long __a, vector unsigned long long __b) { + return (vector long long)vec_sr((vector unsigned long long)__a, __b); } #endif @@ -9544,12 +9546,12 @@ static __inline__ vector signed char __ATTRS_o_ai vec_vsrb(vector signed char __a, vector unsigned char __b) { - return __a >> (vector signed char)__b; + return vec_sr(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsrb(vector unsigned char __a, vector unsigned char __b) { - return __a >> __b; + return vec_sr(__a, __b); } /* vec_vsrh */ @@ -9558,12 +9560,12 @@ static __inline__ vector short __ATTRS_o_ai vec_vsrh(vector short __a, vector unsigned short __b) { - return __a >> (vector short)__b; + return vec_sr(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsrh(vector unsigned short __a, vector unsigned short __b) { - return __a >> __b; + return vec_sr(__a, __b); } /* vec_vsrw */ @@ -9572,12 +9574,12 @@ static __inline__ vector int __ATTRS_o_ai vec_vsrw(vector int __a, vector unsigned int __b) { - return __a >> (vector int)__b; + return vec_sr(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsrw(vector unsigned int __a, vector unsigned int __b) { - return __a >> __b; + return vec_sr(__a, __b); } /* vec_sra */ Index: test/CodeGen/builtins-ppc-altivec.c =================================================================== --- test/CodeGen/builtins-ppc-altivec.c +++ test/CodeGen/builtins-ppc-altivec.c @@ -4256,52 +4256,76 @@ /* vec_sr */ res_vsc = vec_sr(vsc, vuc); -// CHECK: lshr <16 x i8> -// CHECK-LE: lshr <16 x i8> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vuc = vec_sr(vuc, vuc); -// CHECK: lshr <16 x i8> -// CHECK-LE: lshr <16 x i8> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vs = vec_sr(vs, vus); -// CHECK: lshr <8 x i16> -// CHECK-LE: lshr <8 x i16> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vus = vec_sr(vus, vus); -// CHECK: lshr <8 x i16> -// CHECK-LE: lshr <8 x i16> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vi = vec_sr(vi, vui); -// CHECK: lshr <4 x i32> -// CHECK-LE: lshr <4 x i32> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vui = vec_sr(vui, vui); -// CHECK: lshr <4 x i32> -// CHECK-LE: lshr <4 x i32> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vsc = vec_vsrb(vsc, vuc); -// CHECK: shr <16 x i8> -// CHECK-LE: shr <16 x i8> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vuc = vec_vsrb(vuc, vuc); -// CHECK: shr <16 x i8> -// CHECK-LE: shr <16 x i8> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vs = vec_vsrh(vs, vus); -// CHECK: shr <8 x i16> -// CHECK-LE: shr <8 x i16> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vus = vec_vsrh(vus, vus); -// CHECK: shr <8 x i16> -// CHECK-LE: shr <8 x i16> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vi = vec_vsrw(vi, vui); -// CHECK: shr <4 x i32> -// CHECK-LE: shr <4 x i32> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vui = vec_vsrw(vui, vui); -// CHECK: shr <4 x i32> -// CHECK-LE: shr <4 x i32> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] /* vec_sra */ res_vsc = vec_sra(vsc, vuc); Index: test/CodeGen/builtins-ppc-p8vector.c =================================================================== --- test/CodeGen/builtins-ppc-p8vector.c +++ test/CodeGen/builtins-ppc-p8vector.c @@ -1066,13 +1066,17 @@ /* vec_sr */ res_vsll = vec_sr(vsll, vull); -// CHECK: lshr <2 x i64> -// CHECK-LE: lshr <2 x i64> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, +// CHECK: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]] // CHECK-PPC: error: call to 'vec_sr' is ambiguous res_vull = vec_sr(vull, vull); -// CHECK: lshr <2 x i64> -// CHECK-LE: lshr <2 x i64> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, +// CHECK: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]] // CHECK-PPC: error: call to 'vec_sr' is ambiguous /* vec_sra */