Index: include/clang/Basic/BuiltinsPPC.def =================================================================== --- include/clang/Basic/BuiltinsPPC.def +++ include/clang/Basic/BuiltinsPPC.def @@ -134,6 +134,14 @@ BUILTIN(__builtin_altivec_vcmpequd, "V2LLiV2LLiV2LLi", "") BUILTIN(__builtin_altivec_vcmpeqfp, "V4iV4fV4f", "") +BUILTIN(__builtin_altivec_vcmpneb, "V16cV16cV16c", "") +BUILTIN(__builtin_altivec_vcmpneh, "V8sV8sV8s", "") +BUILTIN(__builtin_altivec_vcmpnew, "V4iV4iV4i", "") + +BUILTIN(__builtin_altivec_vcmpnezb, "V16cV16cV16c", "") +BUILTIN(__builtin_altivec_vcmpnezh, "V8sV8sV8s", "") +BUILTIN(__builtin_altivec_vcmpnezw, "V4iV4iV4i", "") + BUILTIN(__builtin_altivec_vcmpgtsb, "V16cV16ScV16Sc", "") BUILTIN(__builtin_altivec_vcmpgtub, "V16cV16UcV16Uc", "") BUILTIN(__builtin_altivec_vcmpgtsh, "V8sV8SsV8Ss", "") @@ -223,6 +231,11 @@ BUILTIN(__builtin_altivec_vcmpequd_p, "iiV2LLiV2LLi", "") BUILTIN(__builtin_altivec_vcmpeqfp_p, "iiV4fV4f", "") +BUILTIN(__builtin_altivec_vcmpneb_p, "iiV16cV16c", "") +BUILTIN(__builtin_altivec_vcmpneh_p, "iiV8sV8s", "") +BUILTIN(__builtin_altivec_vcmpnew_p, "iiV4iV4i", "") +BUILTIN(__builtin_altivec_vcmpned_p, "iiV2LLiV2LLi", "") + BUILTIN(__builtin_altivec_vcmpgtsb_p, "iiV16ScV16Sc", "") BUILTIN(__builtin_altivec_vcmpgtub_p, "iiV16UcV16Uc", "") BUILTIN(__builtin_altivec_vcmpgtsh_p, "iiV8SsV8Ss", "") @@ -254,6 +267,16 @@ BUILTIN(__builtin_altivec_vclzh, "V8UsV8Us", "") BUILTIN(__builtin_altivec_vclzw, "V4UiV4Ui", "") BUILTIN(__builtin_altivec_vclzd, "V2ULLiV2ULLi", "") +BUILTIN(__builtin_altivec_vctzb, "V16UcV16Uc", "") +BUILTIN(__builtin_altivec_vctzh, "V8UsV8Us", "") +BUILTIN(__builtin_altivec_vctzw, "V4UiV4Ui", "") +BUILTIN(__builtin_altivec_vctzd, "V2ULLiV2ULLi", "") + +// Vector population count built-ins +BUILTIN(__builtin_altivec_vpopcntb, "V16UcV16Uc", "") +BUILTIN(__builtin_altivec_vpopcnth, "V8UsV8Us", "") +BUILTIN(__builtin_altivec_vpopcntw, "V4UiV4Ui", "") +BUILTIN(__builtin_altivec_vpopcntd, "V2ULLiV2ULLi", "") // VSX built-ins. Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -1564,6 +1564,10 @@ Group; def mno_power8_vector : Flag<["-"], "mno-power8-vector">, Group; +def mpower9_vector : Flag<["-"], "mpower9-vector">, + Group; +def mno_power9_vector : Flag<["-"], "mno-power9-vector">, + Group; def mpower8_crypto : Flag<["-"], "mcrypto">, Group; def mnopower8_crypto : Flag<["-"], "mno-crypto">, Index: lib/Basic/Targets.cpp =================================================================== --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -870,6 +870,7 @@ bool HasHTM; bool HasBPERMD; bool HasExtDiv; + bool HasP9Vector; protected: std::string ABI; @@ -878,7 +879,7 @@ PPCTargetInfo(const llvm::Triple &Triple, const TargetOptions &) : TargetInfo(Triple), HasVSX(false), HasP8Vector(false), HasP8Crypto(false), HasDirectMove(false), HasQPX(false), HasHTM(false), - HasBPERMD(false), HasExtDiv(false) { + HasBPERMD(false), HasExtDiv(false), HasP9Vector(false) { BigEndian = (Triple.getArch() != llvm::Triple::ppc64le); SimdDefaultAlign = 128; LongDoubleWidth = LongDoubleAlign = 128; @@ -1158,6 +1159,8 @@ HasHTM = true; } else if (Feature == "+float128") { HasFloat128 = true; + } else if (Feature == "+power9-vector") { + HasP9Vector = true; } // TODO: Finish this list and add an assert that we've handled them // all. @@ -1327,6 +1330,8 @@ Builder.defineMacro("__HTM__"); if (HasFloat128) Builder.defineMacro("__FLOAT128__"); + if (HasP9Vector) + Builder.defineMacro("__POWER9_VECTOR__"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); @@ -1384,6 +1389,13 @@ << "-mno-vsx"; return false; } + + if (std::find(FeaturesVec.begin(), FeaturesVec.end(), "+power9-vector") != + FeaturesVec.end()) { + Diags.Report(diag::err_opt_not_valid_with_opt) << "-mpower9-vector" + << "-mno-vsx"; + return false; + } } return true; @@ -1408,6 +1420,7 @@ .Default(false); Features["qpx"] = (CPU == "a2q"); + Features["power9-vector"] = (CPU == "pwr9"); Features["crypto"] = llvm::StringSwitch(CPU) .Case("ppc64le", true) .Case("pwr9", true) @@ -1460,6 +1473,7 @@ .Case("bpermd", HasBPERMD) .Case("extdiv", HasExtDiv) .Case("float128", HasFloat128) + .Case("power9-vector", HasP9Vector) .Default(false); } @@ -1469,19 +1483,21 @@ // as well. Do the inverse if we're disabling vsx. We'll diagnose any user // incompatible options. if (Enabled) { - if (Name == "direct-move") { - Features[Name] = Features["vsx"] = true; - } else if (Name == "power8-vector") { - Features[Name] = Features["vsx"] = true; - } else if (Name == "float128") { + if (Name == "direct-move" || + Name == "power8-vector" || + Name == "float128" || + Name == "power9-vector") { + // power9-vector is really a superset of power8-vector so encode that. Features[Name] = Features["vsx"] = true; + if (Name == "power9-vector") + Features["power8-vector"] = true; } else { Features[Name] = true; } } else { if (Name == "vsx") { Features[Name] = Features["direct-move"] = Features["power8-vector"] = - Features["float128"] = false; + Features["float128"] = Features["power9-vector"] = false; } else { Features[Name] = false; } Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -7521,6 +7521,25 @@ Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); return Builder.CreateCall(F, {X, Undef}); } + case PPC::BI__builtin_altivec_vctzb: + case PPC::BI__builtin_altivec_vctzh: + case PPC::BI__builtin_altivec_vctzw: + case PPC::BI__builtin_altivec_vctzd: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); + return Builder.CreateCall(F, {X, Undef}); + } + case PPC::BI__builtin_altivec_vpopcntb: + case PPC::BI__builtin_altivec_vpopcnth: + case PPC::BI__builtin_altivec_vpopcntw: + case PPC::BI__builtin_altivec_vpopcntd: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); + return Builder.CreateCall(F, X); + } // Copy sign case PPC::BI__builtin_vsx_xvcpsgnsp: case PPC::BI__builtin_vsx_xvcpsgndp: { Index: lib/Headers/altivec.h =================================================================== --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -1595,6 +1595,133 @@ } #endif +#ifdef __POWER9_VECTOR__ +/* vec_cmpne */ + +static __inline__ vector bool char __ATTRS_o_ai +vec_cmpne(vector bool char __a, vector bool char __b) { + return (vector bool char)__builtin_altivec_vcmpneb((vector char)__a, + (vector char)__b); +} + +static __inline__ vector bool char __ATTRS_o_ai +vec_cmpne(vector signed char __a, vector signed char __b) { + return (vector bool char)__builtin_altivec_vcmpneb((vector char)__a, + (vector char)__b); +} + +static __inline__ vector bool char __ATTRS_o_ai +vec_cmpne(vector unsigned char __a, vector unsigned char __b) { + return (vector bool char)__builtin_altivec_vcmpneb((vector char)__a, + (vector char)__b); +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_cmpne(vector bool short __a, vector bool short __b) { + return (vector bool short)__builtin_altivec_vcmpneh((vector short)__a, + (vector short)__b); +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_cmpne(vector signed short __a, vector signed short __b) { + return (vector bool short)__builtin_altivec_vcmpneh((vector short)__a, + (vector short)__b); +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_cmpne(vector unsigned short __a, vector unsigned short __b) { + return (vector bool short)__builtin_altivec_vcmpneh((vector short)__a, + (vector short)__b); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector bool int __a, vector bool int __b) { + return (vector bool int)__builtin_altivec_vcmpnew((vector int)__a, + (vector int)__b); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector signed int __a, vector signed int __b) { + return (vector bool int)__builtin_altivec_vcmpnew((vector int)__a, + (vector int)__b); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector unsigned int __a, vector unsigned int __b) { + return (vector bool int)__builtin_altivec_vcmpnew((vector int)__a, + (vector int)__b); +} + +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector bool long long __a, vector bool long long __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} + +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector signed long long __a, vector signed long long __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} + +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector unsigned long long __a, vector unsigned long long __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector float __a, vector float __b) { + return (vector bool int)__builtin_altivec_vcmpnew((vector int)__a, + (vector int)__b); +} + +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector double __a, vector double __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} + +/* vec_cmpnez */ + +static __inline__ vector bool char __ATTRS_o_ai +vec_cmpnez(vector signed char __a, vector signed char __b) { + return (vector bool char)__builtin_altivec_vcmpnezb((vector char)__a, + (vector char)__b); +} + +static __inline__ vector bool char __ATTRS_o_ai +vec_cmpnez(vector unsigned char __a, vector unsigned char __b) { + return (vector bool char)__builtin_altivec_vcmpnezb((vector char)__a, + (vector char)__b); +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_cmpnez(vector signed short __a, vector signed short __b) { + return (vector bool short)__builtin_altivec_vcmpnezh((vector short)__a, + (vector short)__b); +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_cmpnez(vector unsigned short __a, vector unsigned short __b) { + return (vector bool short)__builtin_altivec_vcmpnezh((vector short)__a, + (vector short)__b); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpnez(vector signed int __a, vector signed int __b) { + return (vector bool int)__builtin_altivec_vcmpnezw((vector int)__a, + (vector int)__b); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpnez(vector unsigned int __a, vector unsigned int __b) { + return (vector bool int)__builtin_altivec_vcmpnezw((vector int)__a, + (vector int)__b); +} + +#endif + /* vec_cmpgt */ static __inline__ vector bool char __ATTRS_o_ai @@ -1882,6 +2009,41 @@ return vec_cmpgt(__b, __a); } +/* vec_popcnt */ + +static __inline__ vector signed char __ATTRS_o_ai +vec_popcnt(vector signed char __a) { + return __builtin_altivec_vpopcntb(__a); +} +static __inline__ vector unsigned char __ATTRS_o_ai +vec_popcnt(vector unsigned char __a) { + return __builtin_altivec_vpopcntb(__a); +} +static __inline__ vector signed short __ATTRS_o_ai +vec_popcnt(vector signed short __a) { + return __builtin_altivec_vpopcnth(__a); +} +static __inline__ vector unsigned short __ATTRS_o_ai +vec_popcnt(vector unsigned short __a) { + return __builtin_altivec_vpopcnth(__a); +} +static __inline__ vector signed int __ATTRS_o_ai +vec_popcnt(vector signed int __a) { + return __builtin_altivec_vpopcntw(__a); +} +static __inline__ vector unsigned int __ATTRS_o_ai +vec_popcnt(vector unsigned int __a) { + return __builtin_altivec_vpopcntw(__a); +} +static __inline__ vector signed long long __ATTRS_o_ai +vec_popcnt(vector signed long long __a) { + return __builtin_altivec_vpopcntd(__a); +} +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_popcnt(vector unsigned long long __a) { + return __builtin_altivec_vpopcntd(__a); +} + /* vec_cntlz */ static __inline__ vector signed char __ATTRS_o_ai @@ -1918,6 +2080,425 @@ } #endif +#ifdef __POWER9_VECTOR__ + +/* vec_cnttz */ + +static __inline__ vector signed char __ATTRS_o_ai +vec_cnttz(vector signed char __a) { + return __builtin_altivec_vctzb(__a); +} +static __inline__ vector unsigned char __ATTRS_o_ai +vec_cnttz(vector unsigned char __a) { + return __builtin_altivec_vctzb(__a); +} +static __inline__ vector signed short __ATTRS_o_ai +vec_cnttz(vector signed short __a) { + return __builtin_altivec_vctzh(__a); +} +static __inline__ vector unsigned short __ATTRS_o_ai +vec_cnttz(vector unsigned short __a) { + return __builtin_altivec_vctzh(__a); +} +static __inline__ vector signed int __ATTRS_o_ai +vec_cnttz(vector signed int __a) { + return __builtin_altivec_vctzw(__a); +} +static __inline__ vector unsigned int __ATTRS_o_ai +vec_cnttz(vector unsigned int __a) { + return __builtin_altivec_vctzw(__a); +} +static __inline__ vector signed long long __ATTRS_o_ai +vec_cnttz(vector signed long long __a) { + return __builtin_altivec_vctzd(__a); +} +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_cnttz(vector unsigned long long __a) { + return __builtin_altivec_vctzd(__a); +} + +/* vec_first_match_index */ + +static __inline__ unsigned __ATTRS_o_ai +vec_first_match_index(vector signed char __a, vector signed char __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpeq(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpeq(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 3; + } + return __res[0] >> 3; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_match_index(vector unsigned char __a, vector unsigned char __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpeq(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpeq(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 3; + } + return __res[0] >> 3; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_match_index(vector signed short __a, vector signed short __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpeq(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpeq(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 4; + } + return __res[0] >> 4; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_match_index(vector unsigned short __a, vector unsigned short __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpeq(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpeq(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 4; + } + return __res[0] >> 4; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_match_index(vector signed int __a, vector signed int __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpeq(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpeq(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 5; + } + return __res[0] >> 5; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_match_index(vector unsigned int __a, vector unsigned int __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpeq(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpeq(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 5; + } + return __res[0] >> 5; +} + +/* vec_first_match_or_eos_index */ + +static __inline__ unsigned __ATTRS_o_ai +vec_first_match_or_eos_index(vector signed char __a, vector signed char __b) { + /* Compare the result of the comparison of two vectors with either and OR the + result. Either the elements are equal or one will equal the comparison + result if either is zero. + */ + vector bool char __tmp1 = vec_cmpeq(__a, __b); + vector bool char __tmp2 = __tmp1 | vec_cmpeq(__tmp1, __a) | + vec_cmpeq(__tmp1, __b); + + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)__tmp2); +#else + vec_cntlz((vector unsigned long long)__tmp2); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 3; + } + return __res[0] >> 3; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_match_or_eos_index(vector unsigned char __a, + vector unsigned char __b) { + vector bool char __tmp1 = vec_cmpeq(__a, __b); + vector bool char __tmp2 = __tmp1 | vec_cmpeq(__tmp1, __a) | + vec_cmpeq(__tmp1, __b); + + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)__tmp2); +#else + vec_cntlz((vector unsigned long long)__tmp2); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 3; + } + return __res[0] >> 3; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_match_or_eos_index(vector signed short __a, vector signed short __b) { + vector bool short __tmp1 = vec_cmpeq(__a, __b); + vector bool short __tmp2 = __tmp1 | vec_cmpeq(__tmp1, __a) | + vec_cmpeq(__tmp1, __b); + + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)__tmp2); +#else + vec_cntlz((vector unsigned long long)__tmp2); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 4; + } + return __res[0] >> 4; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_match_or_eos_index(vector unsigned short __a, + vector unsigned short __b) { + vector bool short __tmp1 = vec_cmpeq(__a, __b); + vector bool short __tmp2 = __tmp1 | vec_cmpeq(__tmp1, __a) | + vec_cmpeq(__tmp1, __b); + + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)__tmp2); +#else + vec_cntlz((vector unsigned long long)__tmp2); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 4; + } + return __res[0] >> 4; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_match_or_eos_index(vector signed int __a, vector signed int __b) { + vector bool int __tmp1 = vec_cmpeq(__a, __b); + vector bool int __tmp2 = __tmp1 | vec_cmpeq(__tmp1, __a) | + vec_cmpeq(__tmp1, __b); + + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)__tmp2); +#else + vec_cntlz((vector unsigned long long)__tmp2); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 5; + } + return __res[0] >> 5; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_match_or_eos_index(vector unsigned int __a, + vector unsigned int __b) { + vector bool int __tmp1 = vec_cmpeq(__a, __b); + vector bool int __tmp2 = __tmp1 | vec_cmpeq(__tmp1, __a) | + vec_cmpeq(__tmp1, __b); + + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)__tmp2); +#else + vec_cntlz((vector unsigned long long)__tmp2); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 5; + } + return __res[0] >> 5; +} + +/* vec_first_mismatch_index */ + +static __inline__ unsigned __ATTRS_o_ai +vec_first_mismatch_index(vector signed char __a, vector signed char __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpne(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpne(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 3; + } + return __res[0] >> 3; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_mismatch_index(vector unsigned char __a, vector unsigned char __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpne(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpne(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 3; + } + return __res[0] >> 3; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_mismatch_index(vector signed short __a, vector signed short __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpne(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpne(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 4; + } + return __res[0] >> 4; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_mismatch_index(vector unsigned short __a, vector unsigned short __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpne(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpne(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 4; + } + return __res[0] >> 4; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_mismatch_index(vector signed int __a, vector signed int __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpne(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpne(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 5; + } + return __res[0] >> 5; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_mismatch_index(vector unsigned int __a, vector unsigned int __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpne(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpne(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 5; + } + return __res[0] >> 5; +} + +/* vec_first_mismatch_or_eos_index */ + +static __inline__ unsigned __ATTRS_o_ai +vec_first_mismatch_or_eos_index(vector signed char __a, + vector signed char __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpnez(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpnez(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 3; + } + return __res[0] >> 3; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_mismatch_or_eos_index(vector unsigned char __a, + vector unsigned char __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpnez(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpnez(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 3; + } + return __res[0] >> 3; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_mismatch_or_eos_index(vector signed short __a, + vector signed short __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpnez(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpnez(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 4; + } + return __res[0] >> 4; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_mismatch_or_eos_index(vector unsigned short __a, + vector unsigned short __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpnez(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpnez(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 4; + } + return __res[0] >> 4; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_mismatch_or_eos_index(vector signed int __a, vector signed int __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpnez(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpnez(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 5; + } + return __res[0] >> 5; +} + +static __inline__ unsigned __ATTRS_o_ai +vec_first_mismatch_or_eos_index(vector unsigned int __a, + vector unsigned int __b) { + vector unsigned long long __res = +#ifdef __LITTLE_ENDIAN__ + vec_cnttz((vector unsigned long long)vec_cmpnez(__a, __b)); +#else + vec_cntlz((vector unsigned long long)vec_cmpnez(__a, __b)); +#endif + if (__res[0] == 64) { + return (__res[1] + 64) >> 5; + } + return __res[0] >> 5; +} + +#endif + /* vec_cpsgn */ #ifdef __VSX__ Index: test/CodeGen/builtins-ppc-p9vector.c =================================================================== --- test/CodeGen/builtins-ppc-p9vector.c +++ test/CodeGen/builtins-ppc-p9vector.c @@ -0,0 +1,748 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -faltivec -target-feature +power9-vector \ +// RUN: -triple powerpc64-unknown-unknown -emit-llvm %s \ +// RUN: -O2 -o - | FileCheck %s -check-prefix=CHECK-BE + +// RUN: %clang_cc1 -faltivec -target-feature +power9-vector \ +// RUN: -triple powerpc64le-unknown-unknown -emit-llvm %s \ +// RUN: -O2 -o - | FileCheck %s + +#include + +vector signed char vsca, vscb; +vector unsigned char vuca, vucb; +vector bool char vbca, vbcb; +vector signed short vssa, vssb; +vector unsigned short vusa, vusb; +vector bool short vbsa, vbsb; +vector signed int vsia, vsib; +vector unsigned int vuia, vuib; +vector bool int vbia, vbib; +vector signed long long vsla, vslb; +vector unsigned long long vula, vulb; +vector bool long long vbla, vblb; +vector float vfa, vfb; +vector double vda, vdb; + +unsigned test1(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 3 +// CHECK: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 3 + return vec_first_match_index (vsca, vscb); +} +unsigned test2(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 3 +// CHECK: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 3 + return vec_first_match_index (vuca, vucb); +} +unsigned test3(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 5 +// CHECK: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 5 + return vec_first_match_index (vsia, vsib); +} +unsigned test4(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 5 +// CHECK: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 5 + return vec_first_match_index (vuia, vuib); +} +unsigned test5(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 4 +// CHECK: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 4 + return vec_first_match_index (vssa, vssb); +} +unsigned test6(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 4 +// CHECK: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 4 + return vec_first_match_index (vusa, vusb); +} +unsigned test7(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK-BE: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK-BE: or <16 x i8> +// CHECK-BE: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK-BE: or <16 x i8> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 3 +// CHECK: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK: or <16 x i8> +// CHECK: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK: or <16 x i8> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 3 + return vec_first_match_or_eos_index (vsca, vscb); +} +unsigned test8(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK-BE: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK-BE: or <16 x i8> +// CHECK-BE: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK-BE: or <16 x i8> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 3 +// CHECK: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK: or <16 x i8> +// CHECK: @llvm.ppc.altivec.vcmpequb(<16 x i8> +// CHECK: or <16 x i8> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 3 + return vec_first_match_or_eos_index (vuca, vucb); +} +unsigned test9(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK-BE: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK-BE: or <4 x i32> +// CHECK-BE: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK-BE: or <4 x i32> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 5 +// CHECK: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK: or <4 x i32> +// CHECK: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK: or <4 x i32> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 5 + return vec_first_match_or_eos_index (vsia, vsib); +} +unsigned test10(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK-BE: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK-BE: or <4 x i32> +// CHECK-BE: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK-BE: or <4 x i32> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 5 +// CHECK: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK: or <4 x i32> +// CHECK: @llvm.ppc.altivec.vcmpequw(<4 x i32> +// CHECK: or <4 x i32> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 5 + return vec_first_match_or_eos_index (vuia, vuib); +} +unsigned test11(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK-BE: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK-BE: or <8 x i16> +// CHECK-BE: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK-BE: or <8 x i16> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 4 +// CHECK: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK: or <8 x i16> +// CHECK: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK: or <8 x i16> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 4 + return vec_first_match_or_eos_index (vssa, vssb); +} +unsigned test12(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK-BE: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK-BE: or <8 x i16> +// CHECK-BE: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK-BE: or <8 x i16> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 4 +// CHECK: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK: or <8 x i16> +// CHECK: @llvm.ppc.altivec.vcmpequh(<8 x i16> +// CHECK: or <8 x i16> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 4 + return vec_first_match_or_eos_index (vusa, vusb); +} +unsigned test13(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpneb(<16 x i8> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 3 +// CHECK: @llvm.ppc.altivec.vcmpneb(<16 x i8> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 3 + return vec_first_mismatch_index (vsca, vscb); +} +unsigned test14(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpneb(<16 x i8> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 3 +// CHECK: @llvm.ppc.altivec.vcmpneb(<16 x i8> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 3 + return vec_first_mismatch_index (vuca, vucb); +} +unsigned test15(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpnew(<4 x i32> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 5 +// CHECK: @llvm.ppc.altivec.vcmpnew(<4 x i32> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 5 + return vec_first_mismatch_index (vsia, vsib); +} +unsigned test16(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpnew(<4 x i32> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 5 +// CHECK: @llvm.ppc.altivec.vcmpnew(<4 x i32> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 5 + return vec_first_mismatch_index (vuia, vuib); +} +unsigned test17(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpneh(<8 x i16> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 4 +// CHECK: @llvm.ppc.altivec.vcmpneh(<8 x i16> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 4 + return vec_first_mismatch_index (vssa, vssb); +} +unsigned test18(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpneh(<8 x i16> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 4 +// CHECK: @llvm.ppc.altivec.vcmpneh(<8 x i16> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 4 + return vec_first_mismatch_index (vusa, vusb); +} +unsigned test19(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpnezb(<16 x i8> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 3 +// CHECK: @llvm.ppc.altivec.vcmpnezb(<16 x i8> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 3 + return vec_first_mismatch_or_eos_index (vsca, vscb); +} +unsigned test20(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpnezb(<16 x i8> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 3 +// CHECK: @llvm.ppc.altivec.vcmpnezb(<16 x i8> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 3 + return vec_first_mismatch_or_eos_index (vuca, vucb); +} +unsigned test21(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpnezw(<4 x i32> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 5 +// CHECK: @llvm.ppc.altivec.vcmpnezw(<4 x i32> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 5 + return vec_first_mismatch_or_eos_index (vsia, vsib); +} +unsigned test22(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpnezw(<4 x i32> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 5 +// CHECK: @llvm.ppc.altivec.vcmpnezw(<4 x i32> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 5 + return vec_first_mismatch_or_eos_index (vuia, vuib); +} +unsigned test23(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpnezh(<8 x i16> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 4 +// CHECK: @llvm.ppc.altivec.vcmpnezh(<8 x i16> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 4 + return vec_first_mismatch_or_eos_index (vssa, vssb); +} +unsigned test24(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpnezh(<8 x i16> +// CHECK-BE: @llvm.ctlz.v2i64(<2 x i64> +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: icmp eq i64 {{.*}}, 64 +// CHECK-BE: extractelement <2 x i64> +// CHECK-BE: add i64 {{.*}}, 64 +// CHECK-BE: select i1 +// CHECK-BE: lshr i64 {{.*}}, 4 +// CHECK: @llvm.ppc.altivec.vcmpnezh(<8 x i16> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK: extractelement <2 x i64> +// CHECK: icmp eq i64 {{.*}}, 64 +// CHECK: extractelement <2 x i64> +// CHECK: add i64 {{.*}}, 64 +// CHECK: select i1 +// CHECK: lshr i64 {{.*}}, 4 + return vec_first_mismatch_or_eos_index (vusa, vusb); +} +vector bool char test25(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpneb(<16 x i8> +// CHECK-BE-NEXT: ret <16 x i8> +// CHECK: @llvm.ppc.altivec.vcmpneb(<16 x i8> +// CHECK-NEXT: ret <16 x i8> + return vec_cmpne (vbca, vbcb); +} +vector bool char test26(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpneb(<16 x i8> +// CHECK-BE-NEXT: ret <16 x i8> +// CHECK: @llvm.ppc.altivec.vcmpneb(<16 x i8> +// CHECK-NEXT: ret <16 x i8> + return vec_cmpne (vsca, vscb); +} +vector bool char test27(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpneb(<16 x i8> +// CHECK-BE-NEXT: ret <16 x i8> +// CHECK: @llvm.ppc.altivec.vcmpneb(<16 x i8> +// CHECK-NEXT: ret <16 x i8> + return vec_cmpne (vuca, vucb); +} +vector bool int test28(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpnew(<4 x i32> +// CHECK-BE-NEXT: ret <4 x i32> +// CHECK: @llvm.ppc.altivec.vcmpnew(<4 x i32> +// CHECK-NEXT: ret <4 x i32> + return vec_cmpne (vbia, vbib); +} +vector bool int test29(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpnew(<4 x i32> +// CHECK-BE-NEXT: ret <4 x i32> +// CHECK: @llvm.ppc.altivec.vcmpnew(<4 x i32> +// CHECK-NEXT: ret <4 x i32> + return vec_cmpne (vsia, vsib); +} +vector bool int test30(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpnew(<4 x i32> +// CHECK-BE-NEXT: ret <4 x i32> +// CHECK: @llvm.ppc.altivec.vcmpnew(<4 x i32> +// CHECK-NEXT: ret <4 x i32> + return vec_cmpne (vuia, vuib); +} +vector bool long long test31(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequd(<2 x i64> +// CHECK-BE: xor <2 x i64> +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: @llvm.ppc.altivec.vcmpequd(<2 x i64> +// CHECK: xor <2 x i64> +// CHECK-NEXT: ret <2 x i64> + return vec_cmpne (vbla, vblb); +} +vector bool long long test32(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequd(<2 x i64> +// CHECK-BE: xor <2 x i64> +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: @llvm.ppc.altivec.vcmpequd(<2 x i64> +// CHECK: xor <2 x i64> +// CHECK-NEXT: ret <2 x i64> + return vec_cmpne (vsla, vslb); +} +vector bool long long test33(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequd(<2 x i64> +// CHECK-BE: xor <2 x i64> +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: @llvm.ppc.altivec.vcmpequd(<2 x i64> +// CHECK: xor <2 x i64> +// CHECK-NEXT: ret <2 x i64> + return vec_cmpne (vula, vulb); +} +vector bool short test34(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpneh(<8 x i16> +// CHECK-BE-NEXT: ret <8 x i16> +// CHECK: @llvm.ppc.altivec.vcmpneh(<8 x i16> +// CHECK-NEXT: ret <8 x i16> + return vec_cmpne (vbsa, vbsb); +} +vector bool short test35(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpneh(<8 x i16> +// CHECK-BE-NEXT: ret <8 x i16> +// CHECK: @llvm.ppc.altivec.vcmpneh(<8 x i16> +// CHECK-NEXT: ret <8 x i16> + return vec_cmpne (vssa, vssb); +} +vector bool short test36(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpneh(<8 x i16> +// CHECK-BE-NEXT: ret <8 x i16> +// CHECK: @llvm.ppc.altivec.vcmpneh(<8 x i16> +// CHECK-NEXT: ret <8 x i16> + return vec_cmpne (vusa, vusb); +} +vector bool long long test37(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpequd(<2 x i64> +// CHECK-BE: xor <2 x i64> +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: @llvm.ppc.altivec.vcmpequd(<2 x i64> +// CHECK: xor <2 x i64> +// CHECK-NEXT: ret <2 x i64> + return vec_cmpne (vda, vdb); +} +vector bool int test38(void) { +// CHECK-BE: @llvm.ppc.altivec.vcmpnew(<4 x i32> +// CHECK-BE-NEXT: ret <4 x i32> +// CHECK: @llvm.ppc.altivec.vcmpnew(<4 x i32> +// CHECK-NEXT: ret <4 x i32> + return vec_cmpne (vfa, vfb); +} +vector signed char test39(void) { +// CHECK-BE: @llvm.cttz.v16i8(<16 x i8> +// CHECK-BE-NEXT: ret <16 x i8> +// CHECK: @llvm.cttz.v16i8(<16 x i8> +// CHECK-NEXT: ret <16 x i8> + return vec_cnttz (vsca); +} +vector unsigned char test40(void) { +// CHECK-BE: @llvm.cttz.v16i8(<16 x i8> +// CHECK-BE-NEXT: ret <16 x i8> +// CHECK: @llvm.cttz.v16i8(<16 x i8> +// CHECK-NEXT: ret <16 x i8> + return vec_cnttz (vuca); +} +vector signed int test41(void) { +// CHECK-BE: @llvm.cttz.v4i32(<4 x i32> +// CHECK-BE-NEXT: ret <4 x i32> +// CHECK: @llvm.cttz.v4i32(<4 x i32> +// CHECK-NEXT: ret <4 x i32> + return vec_cnttz (vsia); +} +vector unsigned int test42(void) { +// CHECK-BE: @llvm.cttz.v4i32(<4 x i32> +// CHECK-BE-NEXT: ret <4 x i32> +// CHECK: @llvm.cttz.v4i32(<4 x i32> +// CHECK-NEXT: ret <4 x i32> + return vec_cnttz (vuia); +} +vector signed long long test43(void) { +// CHECK-BE: @llvm.cttz.v2i64(<2 x i64> +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK-NEXT: ret <2 x i64> + return vec_cnttz (vsla); +} +vector unsigned long long test44(void) { +// CHECK-BE: @llvm.cttz.v2i64(<2 x i64> +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: @llvm.cttz.v2i64(<2 x i64> +// CHECK-NEXT: ret <2 x i64> + return vec_cnttz (vula); +} +vector signed short test45(void) { +// CHECK-BE: @llvm.cttz.v8i16(<8 x i16> +// CHECK-BE-NEXT: ret <8 x i16> +// CHECK: @llvm.cttz.v8i16(<8 x i16> +// CHECK-NEXT: ret <8 x i16> + return vec_cnttz (vssa); +} +vector unsigned short test46(void) { +// CHECK-BE: @llvm.cttz.v8i16(<8 x i16> +// CHECK-BE-NEXT: ret <8 x i16> +// CHECK: @llvm.cttz.v8i16(<8 x i16> +// CHECK-NEXT: ret <8 x i16> + return vec_cnttz (vusa); +} +vector unsigned char test47(void) { +// CHECK-BE: @llvm.ctpop.v16i8(<16 x i8> +// CHECK-BE-NEXT: ret <16 x i8> +// CHECK: @llvm.ctpop.v16i8(<16 x i8> +// CHECK-NEXT: ret <16 x i8> + return vec_popcnt (vsca); +} +vector unsigned char test48(void) { +// CHECK-BE: @llvm.ctpop.v16i8(<16 x i8> +// CHECK-BE-NEXT: ret <16 x i8> +// CHECK: @llvm.ctpop.v16i8(<16 x i8> +// CHECK-NEXT: ret <16 x i8> + return vec_popcnt (vuca); +} +vector unsigned int test49(void) { +// CHECK-BE: @llvm.ctpop.v4i32(<4 x i32> +// CHECK-BE-NEXT: ret <4 x i32> +// CHECK: @llvm.ctpop.v4i32(<4 x i32> +// CHECK-NEXT: ret <4 x i32> + return vec_popcnt (vsia); +} +vector unsigned int test50(void) { +// CHECK-BE: @llvm.ctpop.v4i32(<4 x i32> +// CHECK-BE-NEXT: ret <4 x i32> +// CHECK: @llvm.ctpop.v4i32(<4 x i32> +// CHECK-NEXT: ret <4 x i32> + return vec_popcnt (vuia); +} +vector unsigned long long test51(void) { +// CHECK-BE: @llvm.ctpop.v2i64(<2 x i64> +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: @llvm.ctpop.v2i64(<2 x i64> +// CHECK-NEXT: ret <2 x i64> + return vec_popcnt (vsla); +} +vector unsigned long long test52(void) { +// CHECK-BE: @llvm.ctpop.v2i64(<2 x i64> +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: @llvm.ctpop.v2i64(<2 x i64> +// CHECK-NEXT: ret <2 x i64> + return vec_popcnt (vula); +} +vector unsigned short test53(void) { +// CHECK-BE: @llvm.ctpop.v8i16(<8 x i16> +// CHECK-BE-NEXT: ret <8 x i16> +// CHECK: @llvm.ctpop.v8i16(<8 x i16> +// CHECK-NEXT: ret <8 x i16> + return vec_popcnt (vssa); +} +vector unsigned short test54(void) { +// CHECK-BE: @llvm.ctpop.v8i16(<8 x i16> +// CHECK-BE-NEXT: ret <8 x i16> +// CHECK: @llvm.ctpop.v8i16(<8 x i16> +// CHECK-NEXT: ret <8 x i16> + return vec_popcnt (vusa); +}