diff --git a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h --- a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h @@ -17,7 +17,7 @@ /// Enums used to templatize vector intrinsic function generators. Enum does /// not contain every vector intrinsic, only intrinsics that share generators. -enum class VecOp { Add, And, Mul, Sub, Xor }; +enum class VecOp { Add, And, Anyge, Cmpge, Cmpgt, Cmple, Cmplt, Mul, Sub, Xor }; // Wrapper struct to encapsulate information for a vector type. Preserves // sign of eleTy if eleTy is signed/unsigned integer. Helps with vector type @@ -88,6 +88,14 @@ fir::ExtendedValue genVecAddAndMulSubXor(mlir::Type resultType, llvm::ArrayRef args); + + template + fir::ExtendedValue genVecCmp(mlir::Type resultType, + llvm::ArrayRef args); + + template + fir::ExtendedValue genVecAnyCompare(mlir::Type resultType, + llvm::ArrayRef args); }; const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name); diff --git a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp --- a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp @@ -43,6 +43,31 @@ &PI::genVecAddAndMulSubXor), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, + {"__ppc_vec_any_ge", + static_cast( + &PI::genVecAnyCompare), + {{{"arg1", asValue}, {"arg2", asValue}}}, + /*isElemental=*/true}, + {"__ppc_vec_cmpge", + static_cast( + &PI::genVecCmp), + {{{"arg1", asValue}, {"arg2", asValue}}}, + /*isElemental=*/true}, + {"__ppc_vec_cmpgt", + static_cast( + &PI::genVecCmp), + {{{"arg1", asValue}, {"arg2", asValue}}}, + /*isElemental=*/true}, + {"__ppc_vec_cmple", + static_cast( + &PI::genVecCmp), + {{{"arg1", asValue}, {"arg2", asValue}}}, + /*isElemental=*/true}, + {"__ppc_vec_cmplt", + static_cast( + &PI::genVecCmp), + {{{"arg1", asValue}, {"arg2", asValue}}}, + /*isElemental=*/true}, {"__ppc_vec_mul", static_cast( &PI::genVecAddAndMulSubXor), @@ -324,4 +349,296 @@ return builder.createConvert(loc, argsTy[0], r); } +// VEC_ANY_GE +template +fir::ExtendedValue +PPCIntrinsicLibrary::genVecAnyCompare(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 2); + assert(vop == VecOp::Anyge && "unknown vector compare operation"); + auto argBases{getBasesForArgs(args)}; + VecTypeInfo vTypeInfo{getVecTypeFromFir(argBases[0])}; + const auto isSupportedTy{ + mlir::isa( + vTypeInfo.eleTy)}; + assert(isSupportedTy && "unsupported vector type"); + + // Constants for mapping CR6 bits to predicate result + enum { CR6_EQ_REV = 1, CR6_LT_REV = 3 }; + + auto context{builder.getContext()}; + + static std::map, + std::pair> + uiBuiltin{ + {std::make_pair(ParamTypeId::IntegerVector, 8), + std::make_pair( + "llvm.ppc.altivec.vcmpgtsb.p", + genFuncType, Ty::Integer<4>, Ty::IntegerVector<1>, + Ty::IntegerVector<1>>(context, builder))}, + {std::make_pair(ParamTypeId::IntegerVector, 16), + std::make_pair( + "llvm.ppc.altivec.vcmpgtsh.p", + genFuncType, Ty::Integer<4>, Ty::IntegerVector<2>, + Ty::IntegerVector<2>>(context, builder))}, + {std::make_pair(ParamTypeId::IntegerVector, 32), + std::make_pair( + "llvm.ppc.altivec.vcmpgtsw.p", + genFuncType, Ty::Integer<4>, Ty::IntegerVector<4>, + Ty::IntegerVector<4>>(context, builder))}, + {std::make_pair(ParamTypeId::IntegerVector, 64), + std::make_pair( + "llvm.ppc.altivec.vcmpgtsd.p", + genFuncType, Ty::Integer<4>, Ty::IntegerVector<8>, + Ty::IntegerVector<8>>(context, builder))}, + {std::make_pair(ParamTypeId::UnsignedVector, 8), + std::make_pair( + "llvm.ppc.altivec.vcmpgtub.p", + genFuncType, Ty::Integer<4>, + Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>( + context, builder))}, + {std::make_pair(ParamTypeId::UnsignedVector, 16), + std::make_pair( + "llvm.ppc.altivec.vcmpgtuh.p", + genFuncType, Ty::Integer<4>, + Ty::UnsignedVector<2>, Ty::UnsignedVector<2>>( + context, builder))}, + {std::make_pair(ParamTypeId::UnsignedVector, 32), + std::make_pair( + "llvm.ppc.altivec.vcmpgtuw.p", + genFuncType, Ty::Integer<4>, + Ty::UnsignedVector<4>, Ty::UnsignedVector<4>>( + context, builder))}, + {std::make_pair(ParamTypeId::UnsignedVector, 64), + std::make_pair( + "llvm.ppc.altivec.vcmpgtud.p", + genFuncType, Ty::Integer<4>, + Ty::UnsignedVector<8>, Ty::UnsignedVector<8>>( + context, builder))}, + }; + + mlir::FunctionType ftype{nullptr}; + llvm::StringRef fname; + const auto i32Ty{mlir::IntegerType::get(context, 32)}; + llvm::SmallVector cmpArgs; + mlir::Value op{nullptr}; + const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()}; + + if (auto elementTy = mlir::dyn_cast(vTypeInfo.eleTy)) { + std::pair bi; + bi = (elementTy.isUnsignedInteger()) + ? uiBuiltin[std::pair(ParamTypeId::UnsignedVector, width)] + : uiBuiltin[std::pair(ParamTypeId::IntegerVector, width)]; + + fname = std::get<0>(bi); + ftype = std::get<1>(bi); + + op = builder.createIntegerConstant(loc, i32Ty, CR6_LT_REV); + cmpArgs.emplace_back(op); + // reverse the argument order + cmpArgs.emplace_back(argBases[1]); + cmpArgs.emplace_back(argBases[0]); + } else if (vTypeInfo.isFloat()) { + if (vTypeInfo.isFloat32()) { + fname = "llvm.ppc.vsx.xvcmpgesp.p"; + ftype = genFuncType, Ty::Integer<4>, Ty::RealVector<4>, + Ty::RealVector<4>>(context, builder); + } else { + fname = "llvm.ppc.vsx.xvcmpgedp.p"; + ftype = genFuncType, Ty::Integer<4>, Ty::RealVector<8>, + Ty::RealVector<8>>(context, builder); + } + op = builder.createIntegerConstant(loc, i32Ty, CR6_EQ_REV); + cmpArgs.emplace_back(op); + cmpArgs.emplace_back(argBases[0]); + cmpArgs.emplace_back(argBases[1]); + } + assert((!fname.empty() && ftype) && "invalid type"); + + mlir::func::FuncOp funcOp{builder.addNamedFunction(loc, fname, ftype)}; + auto callOp{builder.create(loc, funcOp, cmpArgs)}; + return callOp.getResult(0); +} + +static std::pair +getVecCmpFuncTypeAndName(VecTypeInfo &vTypeInfo, VecOp vop, + fir::FirOpBuilder &builder) { + auto context{builder.getContext()}; + static std::map, + std::pair> + iuBuiltinName{ + {std::make_pair(ParamTypeId::IntegerVector, 8), + std::make_pair( + "llvm.ppc.altivec.vcmpgtsb", + genFuncType, Ty::IntegerVector<1>, + Ty::IntegerVector<1>>(context, builder))}, + {std::make_pair(ParamTypeId::IntegerVector, 16), + std::make_pair( + "llvm.ppc.altivec.vcmpgtsh", + genFuncType, Ty::IntegerVector<2>, + Ty::IntegerVector<2>>(context, builder))}, + {std::make_pair(ParamTypeId::IntegerVector, 32), + std::make_pair( + "llvm.ppc.altivec.vcmpgtsw", + genFuncType, Ty::IntegerVector<4>, + Ty::IntegerVector<4>>(context, builder))}, + {std::make_pair(ParamTypeId::IntegerVector, 64), + std::make_pair( + "llvm.ppc.altivec.vcmpgtsd", + genFuncType, Ty::IntegerVector<8>, + Ty::IntegerVector<8>>(context, builder))}, + {std::make_pair(ParamTypeId::UnsignedVector, 8), + std::make_pair( + "llvm.ppc.altivec.vcmpgtub", + genFuncType, Ty::UnsignedVector<1>, + Ty::UnsignedVector<1>>(context, builder))}, + {std::make_pair(ParamTypeId::UnsignedVector, 16), + std::make_pair( + "llvm.ppc.altivec.vcmpgtuh", + genFuncType, Ty::UnsignedVector<2>, + Ty::UnsignedVector<2>>(context, builder))}, + {std::make_pair(ParamTypeId::UnsignedVector, 32), + std::make_pair( + "llvm.ppc.altivec.vcmpgtuw", + genFuncType, Ty::UnsignedVector<4>, + Ty::UnsignedVector<4>>(context, builder))}, + {std::make_pair(ParamTypeId::UnsignedVector, 64), + std::make_pair( + "llvm.ppc.altivec.vcmpgtud", + genFuncType, Ty::UnsignedVector<8>, + Ty::UnsignedVector<8>>(context, builder))}}; + + // VSX only defines GE and GT builtins. Cmple and Cmplt use GE and GT with + // arguments revsered. + enum class Cmp { gtOrLt, geOrLe }; + static std::map, + std::pair> + rGBI{{std::make_pair(Cmp::geOrLe, 32), + std::make_pair("llvm.ppc.vsx.xvcmpgesp", + genFuncType, Ty::RealVector<4>, + Ty::RealVector<4>>(context, builder))}, + {std::make_pair(Cmp::geOrLe, 64), + std::make_pair("llvm.ppc.vsx.xvcmpgedp", + genFuncType, Ty::RealVector<8>, + Ty::RealVector<8>>(context, builder))}, + {std::make_pair(Cmp::gtOrLt, 32), + std::make_pair("llvm.ppc.vsx.xvcmpgtsp", + genFuncType, Ty::RealVector<4>, + Ty::RealVector<4>>(context, builder))}, + {std::make_pair(Cmp::gtOrLt, 64), + std::make_pair("llvm.ppc.vsx.xvcmpgtdp", + genFuncType, Ty::RealVector<8>, + Ty::RealVector<8>>(context, builder))}}; + + const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()}; + std::pair specFunc; + if (auto elementTy = mlir::dyn_cast(vTypeInfo.eleTy)) + specFunc = + (elementTy.isUnsignedInteger()) + ? iuBuiltinName[std::make_pair(ParamTypeId::UnsignedVector, width)] + : iuBuiltinName[std::make_pair(ParamTypeId::IntegerVector, width)]; + else if (vTypeInfo.isFloat()) + specFunc = (vop == VecOp::Cmpge || vop == VecOp::Cmple) + ? rGBI[std::make_pair(Cmp::geOrLe, width)] + : rGBI[std::make_pair(Cmp::gtOrLt, width)]; + + assert(!std::get<0>(specFunc).empty() && "unknown builtin name"); + assert(std::get<1>(specFunc) && "unknown function type"); + return specFunc; +} + +// VEC_CMPGE, VEC_CMPGT, VEC_CMPLE, VEC_CMPLT +template +fir::ExtendedValue +PPCIntrinsicLibrary::genVecCmp(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 2); + auto context{builder.getContext()}; + auto argBases{getBasesForArgs(args)}; + VecTypeInfo vecTyInfo{getVecTypeFromFir(argBases[0])}; + auto varg{convertVecArgs(builder, loc, vecTyInfo, argBases)}; + + std::pair funcTyNam{ + getVecCmpFuncTypeAndName(vecTyInfo, vop, builder)}; + + mlir::func::FuncOp funcOp = builder.addNamedFunction( + loc, std::get<0>(funcTyNam), std::get<1>(funcTyNam)); + + mlir::Value res{nullptr}; + + if (auto eTy = vecTyInfo.eleTy.dyn_cast()) { + constexpr int firstArg{0}; + constexpr int secondArg{1}; + std::map> argOrder{ + {VecOp::Cmpge, {secondArg, firstArg}}, + {VecOp::Cmple, {firstArg, secondArg}}, + {VecOp::Cmpgt, {firstArg, secondArg}}, + {VecOp::Cmplt, {secondArg, firstArg}}}; + + // Construct the function return type, unsigned vector, for conversion. + auto itype = mlir::IntegerType::get(context, eTy.getWidth(), + mlir::IntegerType::Unsigned); + auto returnType = fir::VectorType::get(vecTyInfo.len, itype); + + switch (vop) { + case VecOp::Cmpgt: + case VecOp::Cmplt: { + // arg1 > arg2 --> vcmpgt(arg1, arg2) + // arg1 < arg2 --> vcmpgt(arg2, arg1) + mlir::Value vargs[]{argBases[argOrder[vop][0]], + argBases[argOrder[vop][1]]}; + auto callOp{builder.create(loc, funcOp, vargs)}; + res = callOp.getResult(0); + break; + } + case VecOp::Cmpge: + case VecOp::Cmple: { + // arg1 >= arg2 --> vcmpge(arg2, arg1) xor vector(-1) + // arg1 <= arg2 --> vcmpge(arg1, arg2) xor vector(-1) + mlir::Value vargs[]{argBases[argOrder[vop][0]], + argBases[argOrder[vop][1]]}; + + // Construct a constant vector(-1) + auto negOneVal{builder.createIntegerConstant( + loc, getConvertedElementType(context, eTy), -1)}; + auto vNegOne{builder.create( + loc, vecTyInfo.toMlirVectorType(context), negOneVal)}; + + auto callOp{builder.create(loc, funcOp, vargs)}; + mlir::Value callRes{callOp.getResult(0)}; + auto vargs2{ + convertVecArgs(builder, loc, vecTyInfo, mlir::ValueRange{callRes})}; + auto xorRes{builder.create(loc, vargs2[0], vNegOne)}; + + res = builder.createConvert(loc, returnType, xorRes); + break; + } + default: + assert("Invalid vector operation for generator"); + } + } else if (vecTyInfo.isFloat()) { + mlir::Value vargs[2]; + switch (vop) { + case VecOp::Cmpge: + case VecOp::Cmpgt: + vargs[0] = argBases[0]; + vargs[1] = argBases[1]; + break; + case VecOp::Cmple: + case VecOp::Cmplt: + // Swap the arguments as xvcmpg[et] is used + vargs[0] = argBases[1]; + vargs[1] = argBases[0]; + break; + default: + assert("Invalid vector operation for generator"); + } + auto callOp{builder.create(loc, funcOp, vargs)}; + res = callOp.getResult(0); + } else + assert("invalid vector type"); + + return res; +} + } // namespace fir diff --git a/flang/module/__ppc_intrinsics.f90 b/flang/module/__ppc_intrinsics.f90 --- a/flang/module/__ppc_intrinsics.f90 +++ b/flang/module/__ppc_intrinsics.f90 @@ -29,6 +29,12 @@ vector(integer(VKIND)), intent(in) :: arg1, arg2; \ end function ; +! vector(u) function f(vector(i), vector(i)) +#define ELEM_FUNC_VUVIVI(VKIND) \ + elemental vector(unsigned(VKIND)) function elem_func_vu##VKIND##vi##VKIND##vi##VKIND(arg1, arg2); \ + vector(integer(VKIND)), intent(in) :: arg1, arg2; \ + end function ; + ! vector(u) function f(vector(u), vector(u)) #define ELEM_FUNC_VUVUVU(VKIND) \ elemental vector(unsigned(VKIND)) function elem_func_vu##VKIND##vu##VKIND##vu##VKIND(arg1, arg2); \ @@ -41,12 +47,46 @@ vector(real(VKIND)), intent(in) :: arg1, arg2; \ end function ; +! vector(u) function f(vector(r), vector(r)) +#define ELEM_FUNC_VUVRVR(VKIND) \ + elemental vector(unsigned(VKIND)) function elem_func_vu##VKIND##vr##VKIND##vr##VKIND(arg1, arg2); \ + vector(real(VKIND)), intent(in) :: arg1, arg2; \ + end function ; + +! integer function f(vector(i), vector(i)) +#define ELEM_FUNC_IVIVI(RKIND, VKIND) \ + elemental integer(RKIND) function elem_func_i##RKIND##vi##VKIND##vi##VKIND(arg1, arg2); \ + vector(integer(VKIND)), intent(in) :: arg1, arg2; \ + end function ; + +! integer function f(vector(u), vector(u)) +#define ELEM_FUNC_IVUVU(RKIND, VKIND) \ + elemental integer(RKIND) function elem_func_i##RKIND##vu##VKIND##vu##VKIND(arg1, arg2); \ + vector(unsigned(VKIND)), intent(in) :: arg1, arg2; \ + end function ; + +! integer function f(vector(r), vector(r)) +#define ELEM_FUNC_IVRVR(RKIND, VKIND) \ + elemental integer(RKIND) function elem_func_i##RKIND##vr##VKIND##vr##VKIND(arg1, arg2); \ + vector(real(VKIND)), intent(in) :: arg1, arg2; \ + end function ; + ELEM_FUNC_VIVIVI(1) ELEM_FUNC_VIVIVI(2) ELEM_FUNC_VIVIVI(4) ELEM_FUNC_VIVIVI(8) + ELEM_FUNC_VUVIVI(1) ELEM_FUNC_VUVIVI(2) ELEM_FUNC_VUVIVI(4) ELEM_FUNC_VUVIVI(8) ELEM_FUNC_VUVUVU(1) ELEM_FUNC_VUVUVU(2) ELEM_FUNC_VUVUVU(4) ELEM_FUNC_VUVUVU(8) ELEM_FUNC_VRVRVR(4) ELEM_FUNC_VRVRVR(8) - + ELEM_FUNC_VUVRVR(4) ELEM_FUNC_VUVRVR(8) + ELEM_FUNC_IVIVI(4,1) ELEM_FUNC_IVIVI(4,2) ELEM_FUNC_IVIVI(4,4) ELEM_FUNC_IVIVI(4,8) + ELEM_FUNC_IVUVU(4,1) ELEM_FUNC_IVUVU(4,2) ELEM_FUNC_IVUVU(4,4) ELEM_FUNC_IVUVU(4,8) + ELEM_FUNC_IVRVR(4,4) ELEM_FUNC_IVRVR(4,8) + +#undef ELEM_FUNC_IVIVI +#undef ELEM_FUNC_IVUVU +#undef ELEM_FUNC_IVRVR +#undef ELEM_FUNC_VUVRVR #undef ELEM_FUNC_VRVRVR #undef ELEM_FUNC_VUVUVU +#undef ELEM_FUNC_VUVIVI #undef ELEM_FUNC_VIVIVI !! ================ 3 arguments function interface ================ @@ -221,15 +261,21 @@ ! vector function(vector, vector) !--------------------------------- #define VI_VI_VI(NAME, VKIND) __ppc_##NAME##_vi##VKIND##vi##VKIND##vi##VKIND +#define VU_VI_VI(NAME, VKIND) __ppc_##NAME##_vu##VKIND##vi##VKIND##vi##VKIND #define VU_VU_VU(NAME, VKIND) __ppc_##NAME##_vu##VKIND##vu##VKIND##vu##VKIND #define VR_VR_VR(NAME, VKIND) __ppc_##NAME##_vr##VKIND##vr##VKIND##vr##VKIND +#define VU_VR_VR(NAME, VKIND) __ppc_##NAME##_vu##VKIND##vr##VKIND##vr##VKIND #define VEC_VI_VI_VI(NAME, VKIND) \ procedure(elem_func_vi##VKIND##vi##VKIND##vi##VKIND) :: VI_VI_VI(NAME, VKIND); +#define VEC_VU_VI_VI(NAME, VKIND) \ + procedure(elem_func_vu##VKIND##vi##VKIND##vi##VKIND) :: VU_VI_VI(NAME, VKIND); #define VEC_VU_VU_VU(NAME, VKIND) \ procedure(elem_func_vu##VKIND##vu##VKIND##vu##VKIND) :: VU_VU_VU(NAME, VKIND); #define VEC_VR_VR_VR(NAME, VKIND) \ procedure(elem_func_vr##VKIND##vr##VKIND##vr##VKIND) :: VR_VR_VR(NAME, VKIND); +#define VEC_VU_VR_VR(NAME, VKIND) \ + procedure(elem_func_vu##VKIND##vr##VKIND##vr##VKIND) :: VU_VR_VR(NAME, VKIND); ! vec_add VEC_VI_VI_VI(vec_add,1) VEC_VI_VI_VI(vec_add,2) VEC_VI_VI_VI(vec_add,4) VEC_VI_VI_VI(vec_add,8) @@ -253,6 +299,50 @@ end interface vec_and public :: vec_and +! vec_cmpge + VEC_VU_VI_VI(vec_cmpge,1) VEC_VU_VI_VI(vec_cmpge,2) VEC_VU_VI_VI(vec_cmpge,4) VEC_VU_VI_VI(vec_cmpge,8) + VEC_VU_VU_VU(vec_cmpge,1) VEC_VU_VU_VU(vec_cmpge,2) VEC_VU_VU_VU(vec_cmpge,4) VEC_VU_VU_VU(vec_cmpge,8) + VEC_VU_VR_VR(vec_cmpge,4) VEC_VU_VR_VR(vec_cmpge,8) + interface vec_cmpge + procedure :: VU_VI_VI(vec_cmpge,1), VU_VI_VI(vec_cmpge,2), VU_VI_VI(vec_cmpge,4), VU_VI_VI(vec_cmpge,8) + procedure :: VU_VU_VU(vec_cmpge,1), VU_VU_VU(vec_cmpge,2), VU_VU_VU(vec_cmpge,4), VU_VU_VU(vec_cmpge,8) + procedure :: VU_VR_VR(vec_cmpge,4), VU_VR_VR(vec_cmpge,8) + end interface vec_cmpge + public :: vec_cmpge + +! vec_cmpgt + VEC_VU_VI_VI(vec_cmpgt,1) VEC_VU_VI_VI(vec_cmpgt,2) VEC_VU_VI_VI(vec_cmpgt,4) VEC_VU_VI_VI(vec_cmpgt,8) + VEC_VU_VU_VU(vec_cmpgt,1) VEC_VU_VU_VU(vec_cmpgt,2) VEC_VU_VU_VU(vec_cmpgt,4) VEC_VU_VU_VU(vec_cmpgt,8) + VEC_VU_VR_VR(vec_cmpgt,4) VEC_VU_VR_VR(vec_cmpgt,8) + interface vec_cmpgt + procedure :: VU_VI_VI(vec_cmpgt,1), VU_VI_VI(vec_cmpgt,2), VU_VI_VI(vec_cmpgt,4), VU_VI_VI(vec_cmpgt,8) + procedure :: VU_VU_VU(vec_cmpgt,1), VU_VU_VU(vec_cmpgt,2), VU_VU_VU(vec_cmpgt,4), VU_VU_VU(vec_cmpgt,8) + procedure :: VU_VR_VR(vec_cmpgt,4), VU_VR_VR(vec_cmpgt,8) + end interface vec_cmpgt + public :: vec_cmpgt + +! vec_cmple + VEC_VU_VI_VI(vec_cmple,1) VEC_VU_VI_VI(vec_cmple,2) VEC_VU_VI_VI(vec_cmple,4) VEC_VU_VI_VI(vec_cmple,8) + VEC_VU_VU_VU(vec_cmple,1) VEC_VU_VU_VU(vec_cmple,2) VEC_VU_VU_VU(vec_cmple,4) VEC_VU_VU_VU(vec_cmple,8) + VEC_VU_VR_VR(vec_cmple,4) VEC_VU_VR_VR(vec_cmple,8) + interface vec_cmple + procedure :: VU_VI_VI(vec_cmple,1), VU_VI_VI(vec_cmple,2), VU_VI_VI(vec_cmple,4), VU_VI_VI(vec_cmple,8) + procedure :: VU_VU_VU(vec_cmple,1), VU_VU_VU(vec_cmple,2), VU_VU_VU(vec_cmple,4), VU_VU_VU(vec_cmple,8) + procedure :: VU_VR_VR(vec_cmple,4), VU_VR_VR(vec_cmple,8) + end interface vec_cmple + public :: vec_cmple + +! vec_cmplt + VEC_VU_VI_VI(vec_cmplt,1) VEC_VU_VI_VI(vec_cmplt,2) VEC_VU_VI_VI(vec_cmplt,4) VEC_VU_VI_VI(vec_cmplt,8) + VEC_VU_VU_VU(vec_cmplt,1) VEC_VU_VU_VU(vec_cmplt,2) VEC_VU_VU_VU(vec_cmplt,4) VEC_VU_VU_VU(vec_cmplt,8) + VEC_VU_VR_VR(vec_cmplt,4) VEC_VU_VR_VR(vec_cmplt,8) + interface vec_cmplt + procedure :: VU_VI_VI(vec_cmplt,1), VU_VI_VI(vec_cmplt,2), VU_VI_VI(vec_cmplt,4), VU_VI_VI(vec_cmplt,8) + procedure :: VU_VU_VU(vec_cmplt,1), VU_VU_VU(vec_cmplt,2), VU_VU_VU(vec_cmplt,4), VU_VU_VU(vec_cmplt,8) + procedure :: VU_VR_VR(vec_cmplt,4), VU_VR_VR(vec_cmplt,8) + end interface vec_cmplt + public :: vec_cmplt + ! vec_max VEC_VI_VI_VI(vec_max,1) VEC_VI_VI_VI(vec_max,2) VEC_VI_VI_VI(vec_max,4) VEC_VI_VI_VI(vec_max,8) VEC_VU_VU_VU(vec_max,1) VEC_VU_VU_VU(vec_max,2) VEC_VU_VU_VU(vec_max,4) VEC_VU_VU_VU(vec_max,8) @@ -308,11 +398,15 @@ end interface vec_xor public :: vec_xor +#undef VEC_VU_VR_VR #undef VEC_VR_VR_VR #undef VEC_VU_VU_VU #undef VEC_VI_VI_VI +#undef VEC_VU_VI_VI +#undef VU_VR_VR #undef VR_VR_VR #undef VU_VU_VU +#undef VU_VI_VI #undef VI_VI_VI !----------------------------------------- @@ -340,4 +434,36 @@ #undef VEC_VR_VR_VR_VR #undef VR_VR_VR_VR +!---------------------------------- +! integer function(vector, vector) +!---------------------------------- +#define I_VI_VI(NAME, RKIND, VKIND) __ppc_##NAME##_i##RKIND##vi##VKIND##vi##VKIND +#define I_VU_VU(NAME, RKIND, VKIND) __ppc_##NAME##_i##RKIND##vu##VKIND##vu##VKIND +#define I_VR_VR(NAME, RKIND, VKIND) __ppc_##NAME##_i##RKIND##vr##VKIND##vr##VKIND + +#define VEC_I_VI_VI(NAME, RKIND, VKIND) \ + procedure(elem_func_i##RKIND##vi##VKIND##vi##VKIND) :: I_VI_VI(NAME, RKIND, VKIND); +#define VEC_I_VU_VU(NAME, RKIND, VKIND) \ + procedure(elem_func_i##RKIND##vu##VKIND##vu##VKIND) :: I_VU_VU(NAME, RKIND, VKIND); +#define VEC_I_VR_VR(NAME, RKIND, VKIND) \ + procedure(elem_func_i##RKIND##vr##VKIND##vr##VKIND) :: I_VR_VR(NAME, RKIND, VKIND); + +! vec_any_ge + VEC_I_VI_VI(vec_any_ge,4,1) VEC_I_VI_VI(vec_any_ge,4,2) VEC_I_VI_VI(vec_any_ge,4,4) VEC_I_VI_VI(vec_any_ge,4,8) + VEC_I_VU_VU(vec_any_ge,4,1) VEC_I_VU_VU(vec_any_ge,4,2) VEC_I_VU_VU(vec_any_ge,4,4) VEC_I_VU_VU(vec_any_ge,4,8) + VEC_I_VR_VR(vec_any_ge,4,4) VEC_I_VR_VR(vec_any_ge,4,8) + interface vec_any_ge + procedure :: I_VI_VI(vec_any_ge,4,1), I_VI_VI(vec_any_ge,4,2), I_VI_VI(vec_any_ge,4,4), I_VI_VI(vec_any_ge,4,8) + procedure :: I_VU_VU(vec_any_ge,4,1), I_VU_VU(vec_any_ge,4,2), I_VU_VU(vec_any_ge,4,4), I_VU_VU(vec_any_ge,4,8) + procedure :: I_VR_VR(vec_any_ge,4,4), I_VR_VR(vec_any_ge,4,8) + end interface vec_any_ge + public :: vec_any_ge + +#undef VEC_I_VR_VR +#undef VEC_I_VU_VU +#undef VEC_I_VI_VI +#undef I_VR_VR +#undef I_VU_VU +#undef I_VI_VI + end module __ppc_intrinsics diff --git a/flang/test/Lower/PowerPC/ppc-vec_any.f90 b/flang/test/Lower/PowerPC/ppc-vec_any.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/PowerPC/ppc-vec_any.f90 @@ -0,0 +1,219 @@ +! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s +! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s +! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s +! REQUIRES: target=powerpc{{.*}} + +!---------------------- +! vec_any_ge +!---------------------- + +! CHECK-LABEL: vec_any_ge_test_i1 +subroutine vec_any_ge_test_i1(arg1, arg2) + vector(integer(1)), intent(in) :: arg1, arg2 + integer(4) :: r + r = vec_any_ge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32 +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsb.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath : (i32, !fir.vector<16:i8>, !fir.vector<16:i8>) -> i32 + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32 +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtsb.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (i32, vector<16xi8>, vector<16xi8>) -> i32 + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtsb.p(i32 3, <16 x i8> %[[arg2]], <16 x i8> %[[arg1]]) +end subroutine vec_any_ge_test_i1 + +! CHECK-LABEL: vec_any_ge_test_i2 +subroutine vec_any_ge_test_i2(arg1, arg2) + vector(integer(2)), intent(in) :: arg1, arg2 + integer(4) :: r + r = vec_any_ge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32 +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsh.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath : (i32, !fir.vector<8:i16>, !fir.vector<8:i16>) -> i32 + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32 +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtsh.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (i32, vector<8xi16>, vector<8xi16>) -> i32 + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtsh.p(i32 3, <8 x i16> %[[arg2]], <8 x i16> %[[arg1]]) +end subroutine vec_any_ge_test_i2 + +! CHECK-LABEL: vec_any_ge_test_i4 +subroutine vec_any_ge_test_i4(arg1, arg2) + vector(integer(4)), intent(in) :: arg1, arg2 + integer(4) :: r + r = vec_any_ge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32 +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsw.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath : (i32, !fir.vector<4:i32>, !fir.vector<4:i32>) -> i32 + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32 +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtsw.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (i32, vector<4xi32>, vector<4xi32>) -> i32 + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtsw.p(i32 3, <4 x i32> %[[arg2]], <4 x i32> %[[arg1]]) +end subroutine vec_any_ge_test_i4 + +! CHECK-LABEL: vec_any_ge_test_i8 +subroutine vec_any_ge_test_i8(arg1, arg2) + vector(integer(8)), intent(in) :: arg1, arg2 + integer(4) :: r + r = vec_any_ge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32 +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsd.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath : (i32, !fir.vector<2:i64>, !fir.vector<2:i64>) -> i32 + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32 +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtsd.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (i32, vector<2xi64>, vector<2xi64>) -> i32 + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtsd.p(i32 3, <2 x i64> %[[arg2]], <2 x i64> %[[arg1]]) +end subroutine vec_any_ge_test_i8 + +! CHECK-LABEL: vec_any_ge_test_u1 +subroutine vec_any_ge_test_u1(arg1, arg2) + vector(unsigned(1)), intent(in) :: arg1, arg2 + integer(4) :: r + r = vec_any_ge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32 +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtub.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath : (i32, !fir.vector<16:ui8>, !fir.vector<16:ui8>) -> i32 + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32 +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtub.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (i32, vector<16xi8>, vector<16xi8>) -> i32 + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtub.p(i32 3, <16 x i8> %[[arg2]], <16 x i8> %[[arg1]]) +end subroutine vec_any_ge_test_u1 + +! CHECK-LABEL: vec_any_ge_test_u2 +subroutine vec_any_ge_test_u2(arg1, arg2) + vector(unsigned(2)), intent(in) :: arg1, arg2 + integer(4) :: r + r = vec_any_ge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32 +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtuh.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath : (i32, !fir.vector<8:ui16>, !fir.vector<8:ui16>) -> i32 + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32 +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtuh.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (i32, vector<8xi16>, vector<8xi16>) -> i32 + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtuh.p(i32 3, <8 x i16> %[[arg2]], <8 x i16> %[[arg1]]) +end subroutine vec_any_ge_test_u2 + +! CHECK-LABEL: vec_any_ge_test_u4 +subroutine vec_any_ge_test_u4(arg1, arg2) + vector(unsigned(4)), intent(in) :: arg1, arg2 + integer(4) :: r + r = vec_any_ge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32 +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtuw.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath : (i32, !fir.vector<4:ui32>, !fir.vector<4:ui32>) -> i32 + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32 +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtuw.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (i32, vector<4xi32>, vector<4xi32>) -> i32 + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtuw.p(i32 3, <4 x i32> %[[arg2]], <4 x i32> %[[arg1]]) +end subroutine vec_any_ge_test_u4 + +! CHECK-LABEL: vec_any_ge_test_u8 +subroutine vec_any_ge_test_u8(arg1, arg2) + vector(unsigned(8)), intent(in) :: arg1, arg2 + integer(4) :: r + r = vec_any_ge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32 +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtud.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath : (i32, !fir.vector<2:ui64>, !fir.vector<2:ui64>) -> i32 + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32 +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtud.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (i32, vector<2xi64>, vector<2xi64>) -> i32 + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtud.p(i32 3, <2 x i64> %[[arg2]], <2 x i64> %[[arg1]]) +end subroutine vec_any_ge_test_u8 + +! CHECK-LABEL: vec_any_ge_test_r4 +subroutine vec_any_ge_test_r4(arg1, arg2) + vector(real(4)), intent(in) :: arg1, arg2 + integer(4) :: r + r = vec_any_ge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[op:.*]] = arith.constant 1 : i32 +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgesp.p(%[[op]], %[[arg1]], %[[arg2]]) fastmath : (i32, !fir.vector<4:f32>, !fir.vector<4:f32>) -> i32 + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(1 : i32) : i32 +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.vsx.xvcmpgesp.p(%[[op]], %[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath} : (i32, vector<4xf32>, vector<4xf32>) -> i32 + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.vsx.xvcmpgesp.p(i32 1, <4 x float> %[[arg1]], <4 x float> %[[arg2]]) +end subroutine vec_any_ge_test_r4 + +! CHECK-LABEL: vec_any_ge_test_r8 +subroutine vec_any_ge_test_r8(arg1, arg2) + vector(real(8)), intent(in) :: arg1, arg2 + integer(4) :: r + r = vec_any_ge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[op:.*]] = arith.constant 1 : i32 +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgedp.p(%[[op]], %[[arg1]], %[[arg2]]) fastmath : (i32, !fir.vector<2:f64>, !fir.vector<2:f64>) -> i32 + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(1 : i32) : i32 +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.vsx.xvcmpgedp.p(%[[op]], %[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath} : (i32, vector<2xf64>, vector<2xf64>) -> i32 + +! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.vsx.xvcmpgedp.p(i32 1, <2 x double> %[[arg1]], <2 x double> %[[arg2]]) +end subroutine vec_any_ge_test_r8 + diff --git a/flang/test/Lower/PowerPC/ppc-vec_cmp.f90 b/flang/test/Lower/PowerPC/ppc-vec_cmp.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/PowerPC/ppc-vec_cmp.f90 @@ -0,0 +1,827 @@ +! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s +! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s +! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s +! REQUIRES: target=powerpc{{.*}} + +!---------------------- +! vec_cmpge +!---------------------- + +! CHECK-LABEL: vec_cmpge_test_i8 +subroutine vec_cmpge_test_i8(arg1, arg2) + vector(integer(8)) :: arg1, arg2 + vector(unsigned(8)) :: r + r = vec_cmpge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i64 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i64 to vector<2xi64> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsd(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<2:i64>, !fir.vector<2:i64>) -> !fir.vector<2:ui64> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<2xi64> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<2xi64>) -> !fir.vector<2:ui64> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i64) : i64 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi64>) : vector<2xi64> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsd(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<2xi64> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %[[arg2]], <2 x i64> %[[arg1]]) +! CHECK: %{{[0-9]+}} = xor <2 x i64> %[[res]], +end subroutine vec_cmpge_test_i8 + +! CHECK-LABEL: vec_cmpge_test_i4 +subroutine vec_cmpge_test_i4(arg1, arg2) + vector(integer(4)) :: arg1, arg2 + vector(unsigned(4)) :: r + r = vec_cmpge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i32 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i32 to vector<4xi32> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsw(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<4:i32>, !fir.vector<4:i32>) -> !fir.vector<4:ui32> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<4xi32> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<4xi32>) -> !fir.vector<4:ui32> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i32) : i32 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<4xi32>) : vector<4xi32> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsw(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<4xi32> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vcmpgtsw(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]]) +! CHECK: %{{[0-9]+}} = xor <4 x i32> %[[res]], +end subroutine vec_cmpge_test_i4 + +! CHECK-LABEL: vec_cmpge_test_i2 +subroutine vec_cmpge_test_i2(arg1, arg2) + vector(integer(2)) :: arg1, arg2 + vector(unsigned(2)) :: r + r = vec_cmpge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i16 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i16 to vector<8xi16> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsh(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<8:i16>, !fir.vector<8:i16>) -> !fir.vector<8:ui16> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<8xi16> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<8xi16>) -> !fir.vector<8:ui16> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i16) : i16 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<8xi16>) : vector<8xi16> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsh(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<8xi16> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <8 x i16> @llvm.ppc.altivec.vcmpgtsh(<8 x i16> %[[arg2]], <8 x i16> %[[arg1]]) +! CHECK: %{{[0-9]+}} = xor <8 x i16> %[[res]], +end subroutine vec_cmpge_test_i2 + +! CHECK-LABEL: vec_cmpge_test_i1 +subroutine vec_cmpge_test_i1(arg1, arg2) + vector(integer(1)) :: arg1, arg2 + vector(unsigned(1)) :: r + r = vec_cmpge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsb(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<16:i8>, !fir.vector<16:i8>) -> !fir.vector<16:ui8> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<16xi8> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<16xi8>) -> !fir.vector<16:ui8> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i8) : i8 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsb(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<16xi8> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <16 x i8> @llvm.ppc.altivec.vcmpgtsb(<16 x i8> %[[arg2]], <16 x i8> %[[arg1]]) +! CHECK: %{{[0-9]+}} = xor <16 x i8> %[[res]], +end subroutine vec_cmpge_test_i1 + +! CHECK-LABEL: vec_cmpge_test_u8 +subroutine vec_cmpge_test_u8(arg1, arg2) + vector(unsigned(8)) :: arg1, arg2 + vector(unsigned(8)) :: r + r = vec_cmpge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i64 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i64 to vector<2xi64> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtud(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<2:ui64>, !fir.vector<2:ui64>) -> !fir.vector<2:ui64> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<2xi64> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<2xi64>) -> !fir.vector<2:ui64> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i64) : i64 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi64>) : vector<2xi64> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtud(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<2xi64> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %[[arg2]], <2 x i64> %[[arg1]]) +! CHECK: %{{[0-9]+}} = xor <2 x i64> %[[res]], +end subroutine vec_cmpge_test_u8 + +! CHECK-LABEL: vec_cmpge_test_u4 +subroutine vec_cmpge_test_u4(arg1, arg2) + vector(unsigned(4)) :: arg1, arg2 + vector(unsigned(4)) :: r + r = vec_cmpge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i32 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i32 to vector<4xi32> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtuw(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<4:ui32>, !fir.vector<4:ui32>) -> !fir.vector<4:ui32> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<4xi32> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<4xi32>) -> !fir.vector<4:ui32> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i32) : i32 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<4xi32>) : vector<4xi32> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtuw(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<4xi32> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vcmpgtuw(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]]) +! CHECK: %{{[0-9]+}} = xor <4 x i32> %[[res]], +end subroutine vec_cmpge_test_u4 + +! CHECK-LABEL: vec_cmpge_test_u2 +subroutine vec_cmpge_test_u2(arg1, arg2) + vector(unsigned(2)) :: arg1, arg2 + vector(unsigned(2)) :: r + r = vec_cmpge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i16 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i16 to vector<8xi16> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtuh(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<8:ui16>, !fir.vector<8:ui16>) -> !fir.vector<8:ui16> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<8xi16> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<8xi16>) -> !fir.vector<8:ui16> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i16) : i16 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<8xi16>) : vector<8xi16> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtuh(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<8xi16> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <8 x i16> @llvm.ppc.altivec.vcmpgtuh(<8 x i16> %[[arg2]], <8 x i16> %[[arg1]]) +! CHECK: %{{[0-9]+}} = xor <8 x i16> %[[res]], +end subroutine vec_cmpge_test_u2 + +! CHECK-LABEL: vec_cmpge_test_u1 +subroutine vec_cmpge_test_u1(arg1, arg2) + vector(unsigned(1)) :: arg1, arg2 + vector(unsigned(1)) :: r + r = vec_cmpge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtub(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<16:ui8>, !fir.vector<16:ui8>) -> !fir.vector<16:ui8> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<16xi8> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<16xi8>) -> !fir.vector<16:ui8> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i8) : i8 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtub(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<16xi8> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <16 x i8> @llvm.ppc.altivec.vcmpgtub(<16 x i8> %[[arg2]], <16 x i8> %[[arg1]]) +! CHECK: %{{[0-9]+}} = xor <16 x i8> %[[res]], +end subroutine vec_cmpge_test_u1 + +subroutine vec_cmpge_test_r4(arg1, arg2) + vector(real(4)) :: arg1, arg2 + vector(unsigned(4)) :: r + r = vec_cmpge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgesp(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:ui32> + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %[[arg1]], <4 x float> %[[arg2]]) +end subroutine vec_cmpge_test_r4 + +subroutine vec_cmpge_test_r8(arg1, arg2) + vector(real(8)) :: arg1, arg2 + vector(unsigned(8)) :: r + r = vec_cmpge(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgedp(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:ui64> + +! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %[[arg1]], <2 x double> %[[arg2]]) +end subroutine vec_cmpge_test_r8 + +!---------------------- +! vec_cmpgt +!---------------------- + +! CHECK-LABEL: vec_cmpgt_test_i1 +subroutine vec_cmpgt_test_i1(arg1, arg2) + vector(integer(1)) :: arg1, arg2 + vector(unsigned(1)) :: r + r = vec_cmpgt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsb(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<16:i8>, !fir.vector<16:i8>) -> !fir.vector<16:ui8> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vcmpgtsb(<16 x i8> %[[arg1]], <16 x i8> %[[arg2]]) +end subroutine vec_cmpgt_test_i1 + +! CHECK-LABEL: vec_cmpgt_test_i2 +subroutine vec_cmpgt_test_i2(arg1, arg2) + vector(integer(2)) :: arg1, arg2 + vector(unsigned(2)) :: r + r = vec_cmpgt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsh(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<8:i16>, !fir.vector<8:i16>) -> !fir.vector<8:ui16> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vcmpgtsh(<8 x i16> %[[arg1]], <8 x i16> %[[arg2]]) +end subroutine vec_cmpgt_test_i2 + +! CHECK-LABEL: vec_cmpgt_test_i4 +subroutine vec_cmpgt_test_i4(arg1, arg2) + vector(integer(4)) :: arg1, arg2 + vector(unsigned(4)) :: r + r = vec_cmpgt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsw(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<4:i32>, !fir.vector<4:i32>) -> !fir.vector<4:ui32> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vcmpgtsw(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]]) +end subroutine vec_cmpgt_test_i4 + +! CHECK-LABEL: vec_cmpgt_test_i8 +subroutine vec_cmpgt_test_i8(arg1, arg2) + vector(integer(8)) :: arg1, arg2 + vector(unsigned(8)) :: r + r = vec_cmpgt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsd(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<2:i64>, !fir.vector<2:i64>) -> !fir.vector<2:ui64> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %[[arg1]], <2 x i64> %[[arg2]]) +end subroutine vec_cmpgt_test_i8 + +! CHECK-LABEL: vec_cmpgt_test_u1 +subroutine vec_cmpgt_test_u1(arg1, arg2) + vector(unsigned(1)) :: arg1, arg2 + vector(unsigned(1)) :: r + r = vec_cmpgt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtub(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<16:ui8>, !fir.vector<16:ui8>) -> !fir.vector<16:ui8> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vcmpgtub(<16 x i8> %[[arg1]], <16 x i8> %[[arg2]]) +end subroutine vec_cmpgt_test_u1 + +! CHECK-LABEL: vec_cmpgt_test_u2 +subroutine vec_cmpgt_test_u2(arg1, arg2) + vector(unsigned(2)) :: arg1, arg2 + vector(unsigned(2)) :: r + r = vec_cmpgt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtuh(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<8:ui16>, !fir.vector<8:ui16>) -> !fir.vector<8:ui16> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vcmpgtuh(<8 x i16> %[[arg1]], <8 x i16> %[[arg2]]) +end subroutine vec_cmpgt_test_u2 + +! CHECK-LABEL: vec_cmpgt_test_u4 +subroutine vec_cmpgt_test_u4(arg1, arg2) + vector(unsigned(4)) :: arg1, arg2 + vector(unsigned(4)) :: r + r = vec_cmpgt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtuw(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<4:ui32>, !fir.vector<4:ui32>) -> !fir.vector<4:ui32> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vcmpgtuw(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]]) +end subroutine vec_cmpgt_test_u4 + +! CHECK-LABEL: vec_cmpgt_test_u8 +subroutine vec_cmpgt_test_u8(arg1, arg2) + vector(unsigned(8)) :: arg1, arg2 + vector(unsigned(8)) :: r + r = vec_cmpgt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtud(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<2:ui64>, !fir.vector<2:ui64>) -> !fir.vector<2:ui64> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %[[arg1]], <2 x i64> %[[arg2]]) +end subroutine vec_cmpgt_test_u8 + +! CHECK-LABEL: vec_cmpgt_test_r4 +subroutine vec_cmpgt_test_r4(arg1, arg2) + vector(real(4)) :: arg1, arg2 + vector(unsigned(4)) :: r + r = vec_cmpgt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgtsp(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:ui32> + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %[[arg1]], <4 x float> %[[arg2]]) +end subroutine vec_cmpgt_test_r4 + +! CHECK-LABEL: vec_cmpgt_test_r8 +subroutine vec_cmpgt_test_r8(arg1, arg2) + vector(real(8)) :: arg1, arg2 + vector(unsigned(8)) :: r + r = vec_cmpgt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgtdp(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:ui64> + +! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %[[arg1]], <2 x double> %[[arg2]]) +end subroutine vec_cmpgt_test_r8 + +!---------------------- +! vec_cmple +!---------------------- + +! CHECK-LABEL: vec_cmple_test_i8 +subroutine vec_cmple_test_i8(arg1, arg2) + vector(integer(8)) :: arg1, arg2 + vector(unsigned(8)) :: r + r = vec_cmple(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i64 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i64 to vector<2xi64> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsd(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<2:i64>, !fir.vector<2:i64>) -> !fir.vector<2:ui64> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<2xi64> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<2xi64>) -> !fir.vector<2:ui64> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i64) : i64 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi64>) : vector<2xi64> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsd(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<2xi64> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %[[arg1]], <2 x i64> %[[arg2]]) +! CHECK: %{{[0-9]+}} = xor <2 x i64> %[[res]], +end subroutine vec_cmple_test_i8 + +! CHECK-LABEL: vec_cmple_test_i4 +subroutine vec_cmple_test_i4(arg1, arg2) + vector(integer(4)) :: arg1, arg2 + vector(unsigned(4)) :: r + r = vec_cmple(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i32 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i32 to vector<4xi32> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsw(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<4:i32>, !fir.vector<4:i32>) -> !fir.vector<4:ui32> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<4xi32> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<4xi32>) -> !fir.vector<4:ui32> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i32) : i32 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<4xi32>) : vector<4xi32> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsw(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<4xi32> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vcmpgtsw(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]]) +! CHECK: %{{[0-9]+}} = xor <4 x i32> %[[res]], +end subroutine vec_cmple_test_i4 + +! CHECK-LABEL: vec_cmple_test_i2 +subroutine vec_cmple_test_i2(arg1, arg2) + vector(integer(2)) :: arg1, arg2 + vector(unsigned(2)) :: r + r = vec_cmple(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i16 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i16 to vector<8xi16> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsh(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<8:i16>, !fir.vector<8:i16>) -> !fir.vector<8:ui16> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<8xi16> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<8xi16>) -> !fir.vector<8:ui16> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i16) : i16 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<8xi16>) : vector<8xi16> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsh(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<8xi16> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <8 x i16> @llvm.ppc.altivec.vcmpgtsh(<8 x i16> %[[arg1]], <8 x i16> %[[arg2]]) +! CHECK: %{{[0-9]+}} = xor <8 x i16> %[[res]], +end subroutine vec_cmple_test_i2 + +! CHECK-LABEL: vec_cmple_test_i1 +subroutine vec_cmple_test_i1(arg1, arg2) + vector(integer(1)) :: arg1, arg2 + vector(unsigned(1)) :: r + r = vec_cmple(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsb(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<16:i8>, !fir.vector<16:i8>) -> !fir.vector<16:ui8> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<16xi8> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<16xi8>) -> !fir.vector<16:ui8> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i8) : i8 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsb(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<16xi8> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <16 x i8> @llvm.ppc.altivec.vcmpgtsb(<16 x i8> %[[arg1]], <16 x i8> %[[arg2]]) +! CHECK: %{{[0-9]+}} = xor <16 x i8> %[[res]], +end subroutine vec_cmple_test_i1 + +! CHECK-LABEL: vec_cmple_test_u8 +subroutine vec_cmple_test_u8(arg1, arg2) + vector(unsigned(8)) :: arg1, arg2 + vector(unsigned(8)) :: r + r = vec_cmple(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i64 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i64 to vector<2xi64> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtud(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<2:ui64>, !fir.vector<2:ui64>) -> !fir.vector<2:ui64> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<2xi64> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<2xi64>) -> !fir.vector<2:ui64> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i64) : i64 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi64>) : vector<2xi64> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtud(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<2xi64> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %[[arg1]], <2 x i64> %[[arg2]]) +! CHECK: %{{[0-9]+}} = xor <2 x i64> %[[res]], +end subroutine vec_cmple_test_u8 + +! CHECK-LABEL: vec_cmple_test_u4 +subroutine vec_cmple_test_u4(arg1, arg2) + vector(unsigned(4)) :: arg1, arg2 + vector(unsigned(4)) :: r + r = vec_cmple(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i32 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i32 to vector<4xi32> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtuw(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<4:ui32>, !fir.vector<4:ui32>) -> !fir.vector<4:ui32> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<4xi32> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<4xi32>) -> !fir.vector<4:ui32> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i32) : i32 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<4xi32>) : vector<4xi32> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtuw(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<4xi32> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vcmpgtuw(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]]) +! CHECK: %{{[0-9]+}} = xor <4 x i32> %[[res]], +end subroutine vec_cmple_test_u4 + +! CHECK-LABEL: vec_cmple_test_u2 +subroutine vec_cmple_test_u2(arg1, arg2) + vector(unsigned(2)) :: arg1, arg2 + vector(unsigned(2)) :: r + r = vec_cmple(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i16 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i16 to vector<8xi16> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtuh(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<8:ui16>, !fir.vector<8:ui16>) -> !fir.vector<8:ui16> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<8xi16> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<8xi16>) -> !fir.vector<8:ui16> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i16) : i16 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<8xi16>) : vector<8xi16> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtuh(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<8xi16> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <8 x i16> @llvm.ppc.altivec.vcmpgtuh(<8 x i16> %[[arg1]], <8 x i16> %[[arg2]]) +! CHECK: %{{[0-9]+}} = xor <8 x i16> %[[res]], +end subroutine vec_cmple_test_u2 + +! CHECK-LABEL: vec_cmple_test_u1 +subroutine vec_cmple_test_u1(arg1, arg2) + vector(unsigned(1)) :: arg1, arg2 + vector(unsigned(1)) :: r + r = vec_cmple(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8> +! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtub(%[[arg1]], %[[arg2]]) fastmath : (!fir.vector<16:ui8>, !fir.vector<16:ui8>) -> !fir.vector<16:ui8> +! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<16xi8> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<16xi8>) -> !fir.vector<16:ui8> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i8) : i8 +! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtub(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]] : vector<16xi8> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[res:.*]] = call <16 x i8> @llvm.ppc.altivec.vcmpgtub(<16 x i8> %[[arg1]], <16 x i8> %[[arg2]]) +! CHECK: %{{[0-9]+}} = xor <16 x i8> %[[res]], +end subroutine vec_cmple_test_u1 + +! CHECK-LABEL: vec_cmple_test_r4 +subroutine vec_cmple_test_r4(arg1, arg2) + vector(real(4)) :: arg1, arg2 + vector(unsigned(4)) :: r + r = vec_cmple(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgesp(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:ui32> + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %[[arg2]], <4 x float> %[[arg1]]) +end subroutine vec_cmple_test_r4 + +! CHECK-LABEL: vec_cmple_test_r8 +subroutine vec_cmple_test_r8(arg1, arg2) + vector(real(8)) :: arg1, arg2 + vector(unsigned(8)) :: r + r = vec_cmple(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgedp(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:ui64> + +! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %[[arg2]], <2 x double> %[[arg1]]) +end subroutine vec_cmple_test_r8 + +!---------------------- +! vec_cmplt +!---------------------- + +! CHECK-LABEL: vec_cmplt_test_i1 +subroutine vec_cmplt_test_i1(arg1, arg2) + vector(integer(1)) :: arg1, arg2 + vector(unsigned(1)) :: r + r = vec_cmplt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsb(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<16:i8>, !fir.vector<16:i8>) -> !fir.vector<16:ui8> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vcmpgtsb(<16 x i8> %[[arg2]], <16 x i8> %[[arg1]]) +end subroutine vec_cmplt_test_i1 + +! CHECK-LABEL: vec_cmplt_test_i2 +subroutine vec_cmplt_test_i2(arg1, arg2) + vector(integer(2)) :: arg1, arg2 + vector(unsigned(2)) :: r + r = vec_cmplt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsh(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<8:i16>, !fir.vector<8:i16>) -> !fir.vector<8:ui16> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vcmpgtsh(<8 x i16> %[[arg2]], <8 x i16> %[[arg1]]) +end subroutine vec_cmplt_test_i2 + +! CHECK-LABEL: vec_cmplt_test_i4 +subroutine vec_cmplt_test_i4(arg1, arg2) + vector(integer(4)) :: arg1, arg2 + vector(unsigned(4)) :: r + r = vec_cmplt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsw(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<4:i32>, !fir.vector<4:i32>) -> !fir.vector<4:ui32> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vcmpgtsw(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]]) +end subroutine vec_cmplt_test_i4 + +! CHECK-LABEL: vec_cmplt_test_i8 +subroutine vec_cmplt_test_i8(arg1, arg2) + vector(integer(8)) :: arg1, arg2 + vector(unsigned(8)) :: r + r = vec_cmplt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsd(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<2:i64>, !fir.vector<2:i64>) -> !fir.vector<2:ui64> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %[[arg2]], <2 x i64> %[[arg1]]) +end subroutine vec_cmplt_test_i8 + +! CHECK-LABEL: vec_cmplt_test_u1 +subroutine vec_cmplt_test_u1(arg1, arg2) + vector(unsigned(1)) :: arg1, arg2 + vector(unsigned(1)) :: r + r = vec_cmplt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtub(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<16:ui8>, !fir.vector<16:ui8>) -> !fir.vector<16:ui8> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vcmpgtub(<16 x i8> %[[arg2]], <16 x i8> %[[arg1]]) +end subroutine vec_cmplt_test_u1 + +! CHECK-LABEL: vec_cmplt_test_u2 +subroutine vec_cmplt_test_u2(arg1, arg2) + vector(unsigned(2)) :: arg1, arg2 + vector(unsigned(2)) :: r + r = vec_cmplt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtuh(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<8:ui16>, !fir.vector<8:ui16>) -> !fir.vector<8:ui16> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vcmpgtuh(<8 x i16> %[[arg2]], <8 x i16> %[[arg1]]) +end subroutine vec_cmplt_test_u2 + +! CHECK-LABEL: vec_cmplt_test_u4 +subroutine vec_cmplt_test_u4(arg1, arg2) + vector(unsigned(4)) :: arg1, arg2 + vector(unsigned(4)) :: r + r = vec_cmplt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtuw(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<4:ui32>, !fir.vector<4:ui32>) -> !fir.vector<4:ui32> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vcmpgtuw(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]]) +end subroutine vec_cmplt_test_u4 + +! CHECK-LABEL: vec_cmplt_test_u8 +subroutine vec_cmplt_test_u8(arg1, arg2) + vector(unsigned(8)) :: arg1, arg2 + vector(unsigned(8)) :: r + r = vec_cmplt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtud(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<2:ui64>, !fir.vector<2:ui64>) -> !fir.vector<2:ui64> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %[[arg2]], <2 x i64> %[[arg1]]) +end subroutine vec_cmplt_test_u8 + +! CHECK-LABEL: vec_cmplt_test_r4 +subroutine vec_cmplt_test_r4(arg1, arg2) + vector(real(4)) :: arg1, arg2 + vector(unsigned(4)) :: r + r = vec_cmplt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgtsp(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:ui32> + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %[[arg2]], <4 x float> %[[arg1]]) +end subroutine vec_cmplt_test_r4 + +! CHECK-LABEL: vec_cmplt_test_r8 +subroutine vec_cmplt_test_r8(arg1, arg2) + vector(real(8)) :: arg1, arg2 + vector(unsigned(8)) :: r + r = vec_cmplt(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgtdp(%[[arg2]], %[[arg1]]) fastmath : (!fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:ui64> + +! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %[[arg2]], <2 x double> %[[arg1]]) +end subroutine vec_cmplt_test_r8 +