Index: flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h =================================================================== --- flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h +++ flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h @@ -17,7 +17,22 @@ /// Enums used to templatize vector intrinsic function generators. Enum does /// not contain every vector intrinsic, only intrinsics that share generators. -enum class VecOp { Add, And, Anyge, Cmpge, Cmpgt, Cmple, Cmplt, Mul, Sub, Xor }; +enum class VecOp { + Abs, + Add, + And, + Anyge, + Cmpge, + Cmpgt, + Cmple, + Cmplt, + Mul, + Msub, + Nmadd, + Sel, + Sub, + Xor +}; // Wrapper struct to encapsulate information for a vector type. Preserves // sign of eleTy if eleTy is signed/unsigned integer. Helps with vector type @@ -72,6 +87,20 @@ return newArgs; } +// This overload method is used only if arguments are of different types. +static inline llvm::SmallVector +convertVecArgs(fir::FirOpBuilder &builder, mlir::Location loc, + llvm::SmallVectorImpl &vecTyInfo, + llvm::SmallVector args) { + llvm::SmallVector newArgs; + for (size_t i = 0; i < args.size(); i++) { + mlir::Type ty{vecTyInfo[i].toMlirVectorType(builder.getContext())}; + assert(ty && "unknown mlir vector type"); + newArgs.push_back(builder.createConvert(loc, ty, args[i])); + } + return newArgs; +} + struct PPCIntrinsicLibrary : IntrinsicLibrary { // Constructors. @@ -84,6 +113,9 @@ template void genMtfsf(llvm::ArrayRef); + fir::ExtendedValue genVecAbs(mlir::Type resultType, + llvm::ArrayRef args); + template fir::ExtendedValue genVecAddAndMulSubXor(mlir::Type resultType, @@ -96,6 +128,13 @@ template fir::ExtendedValue genVecAnyCompare(mlir::Type resultType, llvm::ArrayRef args); + + template + fir::ExtendedValue genVecNmaddMsub(mlir::Type resultType, + llvm::ArrayRef args); + + fir::ExtendedValue genVecSel(mlir::Type resultType, + llvm::ArrayRef args); }; const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name); Index: flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp =================================================================== --- flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp +++ flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp @@ -33,6 +33,10 @@ static_cast(&PI::genMtfsf), {{{"bf", asValue}, {"i", asValue}}}, /*isElemental=*/false}, + {"__ppc_vec_abs", + static_cast(&PI::genVecAbs), + {{{"arg1", asValue}}}, + /*isElemental=*/true}, {"__ppc_vec_add", static_cast( &PI::genVecAddAndMulSubXor), @@ -68,11 +72,25 @@ &PI::genVecCmp), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, + {"__ppc_vec_msub", + static_cast( + &PI::genVecNmaddMsub), + {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, + /*isElemental=*/true}, {"__ppc_vec_mul", static_cast( &PI::genVecAddAndMulSubXor), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, + {"__ppc_vec_nmadd", + static_cast( + &PI::genVecNmaddMsub), + {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, + /*isElemental=*/true}, + {"__ppc_vec_sel", + static_cast(&PI::genVecSel), + {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, + /*isElemental=*/true}, {"__ppc_vec_sub", static_cast( &PI::genVecAddAndMulSubXor), @@ -282,6 +300,79 @@ builder.create(loc, funcOp, scalarArgs); } +// VEC_ABS +fir::ExtendedValue +PPCIntrinsicLibrary::genVecAbs(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 1); + auto context{builder.getContext()}; + auto argBases{getBasesForArgs(args)}; + auto vTypeInfo{getVecTypeFromFir(argBases[0])}; + + mlir::func::FuncOp funcOp{nullptr}; + mlir::FunctionType ftype; + llvm::StringRef fname{}; + if (vTypeInfo.isFloat()) { + if (vTypeInfo.isFloat32()) { + fname = "llvm.fabs.v4f32"; + ftype = + genFuncType, Ty::RealVector<4>>(context, builder); + } else if (vTypeInfo.isFloat64()) { + fname = "llvm.fabs.v2f64"; + ftype = + genFuncType, Ty::RealVector<8>>(context, builder); + } + + funcOp = builder.addNamedFunction(loc, fname, ftype); + auto callOp{builder.create(loc, funcOp, argBases[0])}; + return callOp.getResult(0); + } else if (auto eleTy = vTypeInfo.eleTy.dyn_cast()) { + // vec_abs(arg1) = max(0 - arg1, arg1) + + auto newVecTy{mlir::VectorType::get(vTypeInfo.len, eleTy)}; + auto varg1{builder.createConvert(loc, newVecTy, argBases[0])}; + // construct vector(0,..) + auto zeroVal{builder.createIntegerConstant(loc, eleTy, 0)}; + auto vZero{ + builder.create(loc, newVecTy, zeroVal)}; + auto zeroSubVarg1{builder.create(loc, vZero, varg1)}; + + mlir::func::FuncOp funcOp{nullptr}; + switch (eleTy.getWidth()) { + case 8: + fname = "llvm.ppc.altivec.vmaxsb"; + ftype = genFuncType, Ty::IntegerVector<1>, + Ty::IntegerVector<1>>(context, builder); + break; + case 16: + fname = "llvm.ppc.altivec.vmaxsh"; + ftype = genFuncType, Ty::IntegerVector<2>, + Ty::IntegerVector<2>>(context, builder); + break; + case 32: + fname = "llvm.ppc.altivec.vmaxsw"; + ftype = genFuncType, Ty::IntegerVector<4>, + Ty::IntegerVector<4>>(context, builder); + break; + case 64: + fname = "llvm.ppc.altivec.vmaxsd"; + ftype = genFuncType, Ty::IntegerVector<8>, + Ty::IntegerVector<8>>(context, builder); + break; + default: + llvm_unreachable("invalid integer size"); + } + funcOp = builder.addNamedFunction(loc, fname, ftype); + + mlir::Value args[] = {zeroSubVarg1, varg1}; + auto callOp{builder.create(loc, funcOp, args)}; + return builder.createConvert(loc, argBases[0].getType(), + callOp.getResult(0)); + } + + llvm_unreachable("unknown vector type"); +} + // VEC_ADD, VEC_AND, VEC_SUB, VEC_MUL, VEC_XOR template fir::ExtendedValue PPCIntrinsicLibrary::genVecAddAndMulSubXor( @@ -641,4 +732,87 @@ return res; } +// VEC_NMADD, VEC_MSUB +template +fir::ExtendedValue +PPCIntrinsicLibrary::genVecNmaddMsub(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 3); + auto context{builder.getContext()}; + auto argBases{getBasesForArgs(args)}; + auto vTypeInfo{getVecTypeFromFir(argBases[0])}; + auto newArgs{convertVecArgs(builder, loc, vTypeInfo, argBases)}; + const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()}; + + static std::map> fmaMap{ + {32, + std::make_pair( + "llvm.fma.v4f32", + genFuncType, Ty::RealVector<4>, Ty::RealVector<4>>( + context, builder))}, + {64, + std::make_pair( + "llvm.fma.v2f64", + genFuncType, Ty::RealVector<8>, Ty::RealVector<8>>( + context, builder))}}; + + auto funcOp{builder.addNamedFunction(loc, std::get<0>(fmaMap[width]), + std::get<1>(fmaMap[width]))}; + if (vop == VecOp::Nmadd) { + // vec_nmadd(arg1, arg2, arg3) = -fma(arg1, arg2, arg3) + auto callOp{builder.create(loc, funcOp, newArgs)}; + + // We need to convert fir.vector to MLIR vector to use fneg and then back + // to fir.vector to store. + auto vCall{builder.createConvert(loc, vTypeInfo.toMlirVectorType(context), + callOp.getResult(0))}; + auto neg{builder.create(loc, vCall)}; + return builder.createConvert(loc, vTypeInfo.toFirVectorType(), neg); + } else if (vop == VecOp::Msub) { + // vec_msub(arg1, arg2, arg3) = fma(arg1, arg2, -arg3) + newArgs[2] = builder.create(loc, newArgs[2]); + + auto callOp{builder.create(loc, funcOp, newArgs)}; + return callOp.getResult(0); + } + llvm_unreachable("Invalid vector operation for generator"); +} + +// VEC_SEL +fir::ExtendedValue +PPCIntrinsicLibrary::genVecSel(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 3); + auto argBases{getBasesForArgs(args)}; + llvm::SmallVector vecTyInfos; + for (size_t i = 0; i < argBases.size(); i++) { + vecTyInfos.push_back(getVecTypeFromFir(argBases[i])); + } + auto vargs{convertVecArgs(builder, loc, vecTyInfos, argBases)}; + + auto i8Ty{mlir::IntegerType::get(builder.getContext(), 8)}; + auto negOne{builder.createIntegerConstant(loc, i8Ty, -1)}; + + // construct a constant <16 x i8> vector with value -1 for bitcast + auto bcVecTy{mlir::VectorType::get(16, i8Ty)}; + auto vNegOne{builder.create(loc, bcVecTy, negOne)}; + + // bitcast arguments to bcVecTy + auto arg1{builder.create(loc, bcVecTy, vargs[0])}; + auto arg2{builder.create(loc, bcVecTy, vargs[1])}; + auto arg3{builder.create(loc, bcVecTy, vargs[2])}; + + // vec_sel(arg1, arg2, arg3) = + // (arg2 and arg3) or (arg1 and (arg3 xor vector(-1,...))) + auto comp{builder.create(loc, arg3, vNegOne)}; + auto a1AndComp{builder.create(loc, arg1, comp)}; + auto a1OrA2{builder.create(loc, arg2, arg3)}; + auto res{builder.create(loc, a1AndComp, a1OrA2)}; + + auto bcRes{ + builder.create(loc, vargs[0].getType(), res)}; + + return builder.createConvert(loc, vecTyInfos[0].toFirVectorType(), bcRes); +} + } // namespace fir Index: flang/module/__ppc_intrinsics.f90 =================================================================== --- flang/module/__ppc_intrinsics.f90 +++ flang/module/__ppc_intrinsics.f90 @@ -22,6 +22,25 @@ !-------------------- ! Vector intrinsic !-------------------- +!! ================ 1 argument function interface ================ +! vector(i) function f(vector(i)) +#define ELEM_FUNC_VIVI(VKIND) \ + elemental vector(integer(VKIND)) function elem_func_vi##VKIND##vi##VKIND(arg1); \ + vector(integer(VKIND)), intent(in) :: arg1; \ + end function ; + +! vector(r) function f(vector(r)) +#define ELEM_FUNC_VRVR(VKIND) \ + elemental vector(real(VKIND)) function elem_func_vr##VKIND##vr##VKIND(arg1); \ + vector(real(VKIND)), intent(in) :: arg1; \ + end function ; + + ELEM_FUNC_VIVI(1) ELEM_FUNC_VIVI(2) ELEM_FUNC_VIVI(4) ELEM_FUNC_VIVI(8) + ELEM_FUNC_VRVR(4) ELEM_FUNC_VRVR(8) + +#undef ELEM_FUNC_VIVI +#undef ELEM_FUNC_VRVR + !! ================ 2 arguments function interface ================ ! vector(i) function f(vector(i), vector(i)) #define ELEM_FUNC_VIVIVI(VKIND) \ @@ -96,9 +115,35 @@ vector(real(VKIND)), intent(in) :: arg1, arg2, arg3; \ end function ; +! vector(i) function f(vector(i), vector(i), vector(u)) +#define ELEM_FUNC_VIVIVIVU(VKIND) \ + elemental vector(integer(VKIND)) function elem_func_vi##VKIND##vi##VKIND##vi##VKIND##vu##VKIND(arg1, arg2, arg3); \ + vector(integer(VKIND)), intent(in) :: arg1, arg2; \ + vector(unsigned(VKIND)), intent(in) :: arg3; \ + end function ; + +! vector(u) function f(vector(u), vector(u), vector(u)) +#define ELEM_FUNC_VUVUVUVU(VKIND) \ + elemental vector(unsigned(VKIND)) function elem_func_vu##VKIND##vu##VKIND##vu##VKIND##vu##VKIND(arg1, arg2, arg3); \ + vector(unsigned(VKIND)), intent(in) :: arg1, arg2, arg3; \ + end function ; + +! vector(r) function f(vector(r), vector(r), vector(u)) +#define ELEM_FUNC_VRVRVRVU(VKIND) \ + elemental vector(real(VKIND)) function elem_func_vr##VKIND##vr##VKIND##vr##VKIND##vu##VKIND(arg1, arg2, arg3); \ + vector(real(VKIND)), intent(in) :: arg1, arg2; \ + vector(unsigned(VKIND)), intent(in) :: arg3; \ + end function ; + + ELEM_FUNC_VIVIVIVU(1) ELEM_FUNC_VIVIVIVU(2) ELEM_FUNC_VIVIVIVU(4) ELEM_FUNC_VIVIVIVU(8) + ELEM_FUNC_VUVUVUVU(1) ELEM_FUNC_VUVUVUVU(2) ELEM_FUNC_VUVUVUVU(4) ELEM_FUNC_VUVUVUVU(8) + ELEM_FUNC_VRVRVRVU(4) ELEM_FUNC_VRVRVRVU(8) ELEM_FUNC_VRVRVRVR(4) ELEM_FUNC_VRVRVRVR(8) #undef ELEM_FUNC_VRVRVRVR +#undef ELEM_FUNC_VRVRVRVU +#undef ELEM_FUNC_VUVUVUVU +#undef ELEM_FUNC_VIVIVIVU end interface @@ -257,6 +302,31 @@ end interface mtfsfi public :: mtfsfi +!------------------------- +! vector function(vector) +!------------------------- +#define VI_VI(NAME, VKIND) __ppc_##NAME##_vi##VKIND##vi##VKIND +#define VR_VR(NAME, VKIND) __ppc_##NAME##_vr##VKIND##vr##VKIND + +#define VEC_VI_VI(NAME, VKIND) \ + procedure(elem_func_vi##VKIND##vi##VKIND) :: VI_VI(NAME, VKIND); +#define VEC_VR_VR(NAME, VKIND) \ + procedure(elem_func_vr##VKIND##vr##VKIND) :: VR_VR(NAME, VKIND); + +! vec_abs + VEC_VI_VI(vec_abs,1) VEC_VI_VI(vec_abs,2) VEC_VI_VI(vec_abs,4) VEC_VI_VI(vec_abs,8) + VEC_VR_VR(vec_abs,4) VEC_VR_VR(vec_abs,8) + interface vec_abs + procedure :: VI_VI(vec_abs,1), VI_VI(vec_abs,2), VI_VI(vec_abs,4), VI_VI(vec_abs,8) + procedure :: VR_VR(vec_abs,4), VR_VR(vec_abs,8) + end interface vec_abs + public :: vec_abs + +#undef VEC_VR_VR +#undef VEC_VI_VI +#undef VR_VR +#undef VI_VI + !--------------------------------- ! vector function(vector, vector) !--------------------------------- @@ -413,9 +483,18 @@ ! vector function(vector, vector, vector) !----------------------------------------- #define VR_VR_VR_VR(NAME, VKIND) __ppc_##NAME##_vr##VKIND##vr##VKIND##vr##VKIND##vr##VKIND +#define VI_VI_VI_VU(NAME, VKIND) __ppc_##NAME##_vi##VKIND##vi##VKIND##vi##VKIND##vu##VKIND +#define VU_VU_VU_VU(NAME, VKIND) __ppc_##NAME##_vu##VKIND##vu##VKIND##vu##VKIND##vu##VKIND +#define VR_VR_VR_VU(NAME, VKIND) __ppc_##NAME##_vr##VKIND##vr##VKIND##vr##VKIND##vu##VKIND #define VEC_VR_VR_VR_VR(NAME, VKIND) \ procedure(elem_func_vr##VKIND##vr##VKIND##vr##VKIND##vr##VKIND) :: VR_VR_VR_VR(NAME, VKIND); +#define VEC_VI_VI_VI_VU(NAME, VKIND) \ + procedure(elem_func_vi##VKIND##vi##VKIND##vi##VKIND##vu##VKIND) :: VI_VI_VI_VU(NAME, VKIND); +#define VEC_VU_VU_VU_VU(NAME, VKIND) \ + procedure(elem_func_vu##VKIND##vu##VKIND##vu##VKIND##vu##VKIND) :: VU_VU_VU_VU(NAME, VKIND); +#define VEC_VR_VR_VR_VU(NAME, VKIND) \ + procedure(elem_func_vr##VKIND##vr##VKIND##vr##VKIND##vu##VKIND) :: VR_VR_VR_VU(NAME, VKIND); ! vec_madd VEC_VR_VR_VR_VR(vec_madd,4) VEC_VR_VR_VR_VR(vec_madd,8) @@ -424,6 +503,20 @@ end interface vec_madd public :: vec_madd +! vec_msub + VEC_VR_VR_VR_VR(vec_msub,4) VEC_VR_VR_VR_VR(vec_msub,8) + interface vec_msub + procedure :: VR_VR_VR_VR(vec_msub,4), VR_VR_VR_VR(vec_msub,8) + end interface vec_msub + public :: vec_msub + +! vec_nmadd + VEC_VR_VR_VR_VR(vec_nmadd,4) VEC_VR_VR_VR_VR(vec_nmadd,8) + interface vec_nmadd + procedure :: VR_VR_VR_VR(vec_nmadd,4), VR_VR_VR_VR(vec_nmadd,8) + end interface vec_nmadd + public :: vec_nmadd + ! vec_nmsub VEC_VR_VR_VR_VR(vec_nmsub,4) VEC_VR_VR_VR_VR(vec_nmsub,8) interface vec_nmsub @@ -431,7 +524,24 @@ end interface vec_nmsub public :: vec_nmsub +! vec_sel + VEC_VI_VI_VI_VU(vec_sel,1) VEC_VI_VI_VI_VU(vec_sel,2) VEC_VI_VI_VI_VU(vec_sel,4) VEC_VI_VI_VI_VU(vec_sel,8) + VEC_VU_VU_VU_VU(vec_sel,1) VEC_VU_VU_VU_VU(vec_sel,2) VEC_VU_VU_VU_VU(vec_sel,4) VEC_VU_VU_VU_VU(vec_sel,8) + VEC_VR_VR_VR_VU(vec_sel,4) VEC_VR_VR_VR_VU(vec_sel,8) + interface vec_sel + procedure :: VI_VI_VI_VU(vec_sel,1), VI_VI_VI_VU(vec_sel,2), VI_VI_VI_VU(vec_sel,4), VI_VI_VI_VU(vec_sel,8) + procedure :: VU_VU_VU_VU(vec_sel,1), VU_VU_VU_VU(vec_sel,2), VU_VU_VU_VU(vec_sel,4), VU_VU_VU_VU(vec_sel,8) + procedure :: VR_VR_VR_VU(vec_sel,4), VR_VR_VR_VU(vec_sel,8) + end interface vec_sel + public :: vec_sel + +#undef VEC_VI_VI_VI_VU +#undef VEC_VU_VU_VU_VU +#undef VEC_VR_VR_VR_VU #undef VEC_VR_VR_VR_VR +#undef VI_VI_VI_VU +#undef VU_VU_VU_VU +#undef VR_VR_VR_VU #undef VR_VR_VR_VR !---------------------------------- Index: flang/test/Lower/PowerPC/ppc-vec_abs.f90 =================================================================== --- /dev/null +++ flang/test/Lower/PowerPC/ppc-vec_abs.f90 @@ -0,0 +1,131 @@ +! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s +! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s +! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s +! REQUIRES: target=powerpc{{.*}} + +!---------------------- +! vec_abs +!---------------------- + +! CHECK-LABEL: vec_abs_i1 +subroutine vec_abs_i1(arg1) + vector(integer(1)) :: arg1, r + r = vec_abs(arg1) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[zero:.*]] = arith.constant 0 : i8 +! CHECK-FIR: %[[vzero:.*]] = vector.broadcast %[[zero]] : i8 to vector<16xi8> +! CHECK-FIR: %[[sub:.*]] = arith.subi %[[vzero]], %[[varg1]] : vector<16xi8> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vmaxsb(%[[sub]], %[[varg1]]) fastmath : (vector<16xi8>, vector<16xi8>) -> !fir.vector<16:i8> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %{{.*}} = llvm.mlir.constant(0 : i8) : i8 +! CHECK-LLVMIR: %[[vzero:.*]] = llvm.mlir.constant(dense<0> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[sub:.*]] = llvm.sub %[[vzero]], %[[arg1]] : vector<16xi8> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vmaxsb(%[[sub]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[sub:.*]] = sub <16 x i8> zeroinitializer, %[[arg1]] +! CHECK: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %[[sub]], <16 x i8> %[[arg1]]) +end subroutine vec_abs_i1 + +! CHECK-LABEL: vec_abs_i2 +subroutine vec_abs_i2(arg1) + vector(integer(2)) :: arg1, r + r = vec_abs(arg1) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[zero:.*]] = arith.constant 0 : i16 +! CHECK-FIR: %[[vzero:.*]] = vector.broadcast %[[zero]] : i16 to vector<8xi16> +! CHECK-FIR: %[[sub:.*]] = arith.subi %[[vzero]], %[[varg1]] : vector<8xi16> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vmaxsh(%[[sub]], %[[varg1]]) fastmath : (vector<8xi16>, vector<8xi16>) -> !fir.vector<8:i16> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %{{.*}} = llvm.mlir.constant(0 : i16) : i16 +! CHECK-LLVMIR: %[[vzero:.*]] = llvm.mlir.constant(dense<0> : vector<8xi16>) : vector<8xi16> +! CHECK-LLVMIR: %[[sub:.*]] = llvm.sub %[[vzero]], %[[arg1]] : vector<8xi16> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vmaxsh(%[[sub]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[sub:.*]] = sub <8 x i16> zeroinitializer, %[[arg1]] +! CHECK: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %[[sub]], <8 x i16> %[[arg1]]) +end subroutine vec_abs_i2 + +! CHECK-LABEL: vec_abs_i4 +subroutine vec_abs_i4(arg1) + vector(integer(4)) :: arg1, r + r = vec_abs(arg1) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[zero:.*]] = arith.constant 0 : i32 +! CHECK-FIR: %[[vzero:.*]] = vector.broadcast %[[zero]] : i32 to vector<4xi32> +! CHECK-FIR: %[[sub:.*]] = arith.subi %[[vzero]], %[[varg1]] : vector<4xi32> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vmaxsw(%[[sub]], %[[varg1]]) fastmath : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %{{.*}} = llvm.mlir.constant(0 : i32) : i32 +! CHECK-LLVMIR: %[[vzero:.*]] = llvm.mlir.constant(dense<0> : vector<4xi32>) : vector<4xi32> +! CHECK-LLVMIR: %[[sub:.*]] = llvm.sub %[[vzero]], %[[arg1]] : vector<4xi32> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vmaxsw(%[[sub]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[sub:.*]] = sub <4 x i32> zeroinitializer, %[[arg1]] +! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %[[sub]], <4 x i32> %[[arg1]]) +end subroutine vec_abs_i4 + +! CHECK-LABEL: vec_abs_i8 +subroutine vec_abs_i8(arg1) + vector(integer(8)) :: arg1, r + r = vec_abs(arg1) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[zero:.*]] = arith.constant 0 : i64 +! CHECK-FIR: %[[vzero:.*]] = vector.broadcast %[[zero]] : i64 to vector<2xi64> +! CHECK-FIR: %[[sub:.*]] = arith.subi %[[vzero]], %[[varg1]] : vector<2xi64> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vmaxsd(%[[sub]], %[[varg1]]) fastmath : (vector<2xi64>, vector<2xi64>) -> !fir.vector<2:i64> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %{{.*}} = llvm.mlir.constant(0 : i64) : i64 +! CHECK-LLVMIR: %[[vzero:.*]] = llvm.mlir.constant(dense<0> : vector<2xi64>) : vector<2xi64> +! CHECK-LLVMIR: %[[sub:.*]] = llvm.sub %[[vzero]], %[[arg1]] : vector<2xi64> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vmaxsd(%[[sub]], %[[arg1]]) {fastmathFlags = #llvm.fastmath} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[sub:.*]] = sub <2 x i64> zeroinitializer, %[[arg1]] +! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %[[sub]], <2 x i64> %[[arg1]]) +end subroutine vec_abs_i8 + +! CHECK-LABEL: vec_abs_r4 +subroutine vec_abs_r4(arg1) + vector(real(4)) :: arg1, r + r = vec_abs(arg1) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.fabs.v4f32(%[[arg1]]) fastmath : (!fir.vector<4:f32>) -> !fir.vector<4:f32> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.fabs.v4f32(%[[arg1]]) {fastmathFlags = #llvm.fastmath} : (vector<4xf32>) -> vector<4xf32> + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call contract <4 x float> @llvm.fabs.v4f32(<4 x float> %[[arg1]]) +end subroutine vec_abs_r4 + +! CHECK-LABEL: vec_abs_r8 +subroutine vec_abs_r8(arg1) + vector(real(8)) :: arg1, r + r = vec_abs(arg1) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.fabs.v2f64(%[[arg1]]) fastmath : (!fir.vector<2:f64>) -> !fir.vector<2:f64> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.fabs.v2f64(%[[arg1]]) {fastmathFlags = #llvm.fastmath} : (vector<2xf64>) -> vector<2xf64> + +! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %{{[0-9]+}} = call contract <2 x double> @llvm.fabs.v2f64(<2 x double> %[[arg1]]) +end subroutine vec_abs_r8 + Index: flang/test/Lower/PowerPC/ppc-vec_max-min-madd-nmsub.f90 =================================================================== --- flang/test/Lower/PowerPC/ppc-vec_max-min-madd-nmsub.f90 +++ flang/test/Lower/PowerPC/ppc-vec_max-min-madd-nmsub.f90 @@ -502,3 +502,127 @@ ! CHECK: %[[vnmsub:.*]] = call contract <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[z]]) ! CHECK: store <2 x double> %[[vnmsub]], ptr %{{[0-9]}}, align 16 end subroutine vec_nmsub_testf64 + +! vec_msub + +! CHECK-LABEL: vec_msub_testf32 +subroutine vec_msub_testf32(x, y, z) + vector(real(4)) :: vmsub, x, y, z + vmsub = vec_msub(x, y, z) +! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref> +! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref> +! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[vz:.*]] = fir.convert %[[z]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[nz:.*]] = arith.negf %[[vz]] fastmath : vector<4xf32> +! CHECK-FIR: %[[vmsub:.*]] = fir.call @llvm.fma.v4f32(%[[vx]], %[[vy]], %[[nz]]) fastmath : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[vmsub]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr> +! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr> +! CHECK-LLVMIR: %[[nz:.*]] = llvm.fneg %[[z]] {fastmathFlags = #llvm.fastmath} : vector<4xf32> +! CHECK-LLVMIR: %[[vmsub:.*]] = llvm.call @llvm.fma.v4f32(%[[x]], %[[y]], %[[nz]]) {fastmathFlags = #llvm.fastmath} : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[vmsub]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[z:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[nz:.*]] = fneg contract <4 x float> %[[z]] +! CHECK: %[[vmsub:.*]] = call contract <4 x float> @llvm.fma.v4f32(<4 x float> %[[x]], <4 x float> %[[y]], <4 x float> %[[nz]]) +! CHECK: store <4 x float> %[[vmsub]], ptr %{{[0-9]}}, align 16 +end subroutine vec_msub_testf32 + +! CHECK-LABEL: vec_msub_testf64 +subroutine vec_msub_testf64(x, y, z) + vector(real(8)) :: vmsub, x, y, z + vmsub = vec_msub(x, y, z) +! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref> +! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref> +! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[vz:.*]] = fir.convert %[[z]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[nz:.*]] = arith.negf %[[vz]] fastmath : vector<2xf64> +! CHECK-FIR: %[[vmsub:.*]] = fir.call @llvm.fma.v2f64(%[[vx]], %[[vy]], %[[nz]]) fastmath : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> !fir.vector<2:f64> +! CHECK-FIR: fir.store %[[vmsub]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr> +! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr> +! CHECK-LLVMIR: %[[nz:.*]] = llvm.fneg %[[z]] {fastmathFlags = #llvm.fastmath} : vector<2xf64> +! CHECK-LLVMIR: %[[vmsub:.*]] = llvm.call @llvm.fma.v2f64(%[[x]], %[[y]], %[[nz]]) {fastmathFlags = #llvm.fastmath} : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[vmsub]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[z:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[nz:.*]] = fneg contract <2 x double> %[[z]] +! CHECK: %[[vmsub:.*]] = call contract <2 x double> @llvm.fma.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[nz]]) +! CHECK: store <2 x double> %[[vmsub]], ptr %{{[0-9]}}, align 16 +end subroutine vec_msub_testf64 + +! vec_nmadd + +! CHECK-LABEL: vec_nmadd_testf32 +subroutine vec_nmadd_testf32(x, y, z) + vector(real(4)) :: vnmsum, x, y, z + vnmsum = vec_nmadd(x, y, z) +! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref> +! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref> +! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[vz:.*]] = fir.convert %[[z]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[msum:.*]] = fir.call @llvm.fma.v4f32(%[[vx]], %[[vy]], %[[vz]]) fastmath : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: %[[vmsum:.*]] = fir.convert %[[msum]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[nmsum:.*]] = arith.negf %[[vmsum]] fastmath : vector<4xf32> +! CHECK-FIR: %[[vnmsum:.*]] = fir.convert %[[nmsum]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[vnmsum]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr> +! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr> +! CHECK-LLVMIR: %[[msum:.*]] = llvm.call @llvm.fma.v4f32(%[[x]], %[[y]], %[[z]]) {fastmathFlags = #llvm.fastmath} : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> +! CHECK-LLVMIR: %[[vnmsum:.*]] = llvm.fneg %[[msum]] {fastmathFlags = #llvm.fastmath} : vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[vnmsum]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[z:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[msum:.*]] = call contract <4 x float> @llvm.fma.v4f32(<4 x float> %[[x]], <4 x float> %[[y]], <4 x float> %[[z]]) +! CHECK: %[[vnmsum:.*]] = fneg contract <4 x float> %[[msum]] +! CHECK: store <4 x float> %[[vnmsum]], ptr %{{[0-9]}}, align 16 +end subroutine vec_nmadd_testf32 + +! CHECK-LABEL: vec_nmadd_testf64 +subroutine vec_nmadd_testf64(x, y, z) + vector(real(8)) :: vnmsum, x, y, z + vnmsum = vec_nmadd(x, y, z) +! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref> +! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref> +! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[vz:.*]] = fir.convert %[[z]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[msum:.*]] = fir.call @llvm.fma.v2f64(%[[vx]], %[[vy]], %[[vz]]) fastmath : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> !fir.vector<2:f64> +! CHECK-FIR: %[[vmsum:.*]] = fir.convert %[[msum]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[nmsum:.*]] = arith.negf %[[vmsum]] fastmath : vector<2xf64> +! CHECK-FIR: %[[vnmsum:.*]] = fir.convert %[[nmsum]] : (vector<2xf64>) -> !fir.vector<2:f64> +! CHECK-FIR: fir.store %[[vnmsum]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr> +! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr> +! CHECK-LLVMIR: %[[msum:.*]] = llvm.call @llvm.fma.v2f64(%[[x]], %[[y]], %[[z]]) {fastmathFlags = #llvm.fastmath} : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> vector<2xf64> +! CHECK-LLVMIR: %[[vnmsum:.*]] = llvm.fneg %[[msum]] {fastmathFlags = #llvm.fastmath} : vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[vnmsum]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[z:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[msum:.*]] = call contract <2 x double> @llvm.fma.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[z]]) +! CHECK: %[[vnmsum:.*]] = fneg contract <2 x double> %[[msum]] +! CHECK: store <2 x double> %[[vnmsum]], ptr %{{[0-9]}}, align 16 +end subroutine vec_nmadd_testf64 Index: flang/test/Lower/PowerPC/ppc-vec_sel.f90 =================================================================== --- /dev/null +++ flang/test/Lower/PowerPC/ppc-vec_sel.f90 @@ -0,0 +1,492 @@ +! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s +! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s +! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s +! REQUIRES: target=powerpc{{.*}} + +!---------------------- +! vec_sel +!---------------------- + +! CHECK-LABEL: vec_sel_testi1 +subroutine vec_sel_testi1(arg1, arg2, arg3) + vector(integer(1)) :: arg1, arg2, r + vector(unsigned(1)) :: arg3 + r = vec_sel(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8> +! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<16xi8> +! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<16xi8> +! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<16xi8> to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8> +! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8> +! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8> +! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<16xi8> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<16xi8>) -> !fir.vector<16:i8> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3:.*]], %[[c]] : vector<16xi8> +! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[arg1]], %[[xor]] : vector<16xi8> +! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[arg2]], %[[arg3]] : vector<16xi8> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.or %[[and1]], %[[and2]] : vector<16xi8> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[comp:.*]] = xor <16 x i8> %[[arg3]], +! CHECK: %[[and1:.*]] = and <16 x i8> %[[arg1]], %[[comp]] +! CHECK: %[[and2:.*]] = and <16 x i8> %[[arg2]], %[[arg3]] +! CHECK: %{{[0-9]+}} = or <16 x i8> %[[and1]], %[[and2]] +end subroutine vec_sel_testi1 + +! CHECK-LABEL: vec_sel_testi2 +subroutine vec_sel_testi2(arg1, arg2, arg3) + vector(integer(2)) :: arg1, arg2, r + vector(unsigned(2)) :: arg3 + r = vec_sel(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8> +! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<16xi8> +! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<16xi8> +! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<8xi16> to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8> +! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8> +! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8> +! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<8xi16> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<8xi16>) -> !fir.vector<8:i16> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<16xi8> +! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<16xi8> +! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<8xi16> to vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8> +! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8> +! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8> +! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<8xi16> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[bc1:.*]] = bitcast <8 x i16> %5 to <16 x i8> +! CHECK: %[[bc2:.*]] = bitcast <8 x i16> %6 to <16 x i8> +! CHECK: %[[bc3:.*]] = bitcast <8 x i16> %7 to <16 x i8> +! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], +! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]] +! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]] +! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]] +! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <8 x i16> +end subroutine vec_sel_testi2 + +! CHECK-LABEL: vec_sel_testi4 +subroutine vec_sel_testi4(arg1, arg2, arg3) + vector(integer(4)) :: arg1, arg2, r + vector(unsigned(4)) :: arg3 + r = vec_sel(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8> +! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<4xi32> to vector<16xi8> +! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<4xi32> to vector<16xi8> +! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<4xi32> to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8> +! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8> +! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8> +! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<4xi32> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<4xi32>) -> !fir.vector<4:i32> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<4xi32> to vector<16xi8> +! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<4xi32> to vector<16xi8> +! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<4xi32> to vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8> +! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8> +! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8> +! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<4xi32> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[bc1:.*]] = bitcast <4 x i32> %5 to <16 x i8> +! CHECK: %[[bc2:.*]] = bitcast <4 x i32> %6 to <16 x i8> +! CHECK: %[[bc3:.*]] = bitcast <4 x i32> %7 to <16 x i8> +! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], +! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]] +! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]] +! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]] +! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <4 x i32> +end subroutine vec_sel_testi4 + +! CHECK-LABEL: vec_sel_testi8 +subroutine vec_sel_testi8(arg1, arg2, arg3) + vector(integer(8)) :: arg1, arg2, r + vector(unsigned(8)) :: arg3 + r = vec_sel(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8> +! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<2xi64> to vector<16xi8> +! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<2xi64> to vector<16xi8> +! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<2xi64> to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8> +! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8> +! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8> +! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<2xi64> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<2xi64>) -> !fir.vector<2:i64> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<2xi64> to vector<16xi8> +! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<2xi64> to vector<16xi8> +! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<2xi64> to vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8> +! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8> +! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8> +! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<2xi64> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[bc1:.*]] = bitcast <2 x i64> %5 to <16 x i8> +! CHECK: %[[bc2:.*]] = bitcast <2 x i64> %6 to <16 x i8> +! CHECK: %[[bc3:.*]] = bitcast <2 x i64> %7 to <16 x i8> +! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], +! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]] +! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]] +! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]] +! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <2 x i64> +end subroutine vec_sel_testi8 + +! CHECK-LABEL: vec_sel_testu1 +subroutine vec_sel_testu1(arg1, arg2, arg3) + vector(unsigned(1)) :: arg1, arg2, r + vector(unsigned(1)) :: arg3 + r = vec_sel(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8> +! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<16xi8> +! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<16xi8> +! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<16xi8> to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8> +! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8> +! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8> +! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<16xi8> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<16xi8>) -> !fir.vector<16:ui8> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3:.*]], %[[c]] : vector<16xi8> +! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[arg1]], %[[xor]] : vector<16xi8> +! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[arg2]], %[[arg3]] : vector<16xi8> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.or %[[and1:.*]], %[[and2]] : vector<16xi8> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[comp:.*]] = xor <16 x i8> %[[arg3]], +! CHECK: %[[and1:.*]] = and <16 x i8> %[[arg1]], %[[comp]] +! CHECK: %[[and2:.*]] = and <16 x i8> %[[arg2]], %[[arg3]] +! CHECK: %{{[0-9]+}} = or <16 x i8> %[[and1]], %[[and2]] +end subroutine vec_sel_testu1 + +! CHECK-LABEL: vec_sel_testu2 +subroutine vec_sel_testu2(arg1, arg2, arg3) + vector(unsigned(2)) :: arg1, arg2, r + vector(unsigned(2)) :: arg3 + r = vec_sel(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8> +! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<16xi8> +! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<16xi8> +! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<8xi16> to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8> +! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8> +! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8> +! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<8xi16> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<8xi16>) -> !fir.vector<8:ui16> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<16xi8> +! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<16xi8> +! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<8xi16> to vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8> +! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8> +! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8> +! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1:.*]], %[[and2]] : vector<16xi8> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<8xi16> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[bc1:.*]] = bitcast <8 x i16> %5 to <16 x i8> +! CHECK: %[[bc2:.*]] = bitcast <8 x i16> %6 to <16 x i8> +! CHECK: %[[bc3:.*]] = bitcast <8 x i16> %7 to <16 x i8> +! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], +! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]] +! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]] +! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]] +! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <8 x i16> +end subroutine vec_sel_testu2 + +! CHECK-LABEL: vec_sel_testu4 +subroutine vec_sel_testu4(arg1, arg2, arg3) + vector(unsigned(4)) :: arg1, arg2, r + vector(unsigned(4)) :: arg3 + r = vec_sel(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8> +! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<4xi32> to vector<16xi8> +! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<4xi32> to vector<16xi8> +! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<4xi32> to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8> +! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8> +! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8> +! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<4xi32> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<4xi32>) -> !fir.vector<4:ui32> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<4xi32> to vector<16xi8> +! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<4xi32> to vector<16xi8> +! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<4xi32> to vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8> +! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8> +! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8> +! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<4xi32> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[bc1:.*]] = bitcast <4 x i32> %5 to <16 x i8> +! CHECK: %[[bc2:.*]] = bitcast <4 x i32> %6 to <16 x i8> +! CHECK: %[[bc3:.*]] = bitcast <4 x i32> %7 to <16 x i8> +! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], +! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]] +! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]] +! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]] +! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <4 x i32> +end subroutine vec_sel_testu4 + +! CHECK-LABEL: vec_sel_testu8 +subroutine vec_sel_testu8(arg1, arg2, arg3) + vector(unsigned(8)) :: arg1, arg2, r + vector(unsigned(8)) :: arg3 + r = vec_sel(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8> +! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<2xi64> to vector<16xi8> +! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<2xi64> to vector<16xi8> +! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<2xi64> to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8> +! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8> +! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8> +! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<2xi64> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<2xi64>) -> !fir.vector<2:ui64> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<2xi64> to vector<16xi8> +! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<2xi64> to vector<16xi8> +! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<2xi64> to vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8> +! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8> +! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8> +! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<2xi64> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[bc1:.*]] = bitcast <2 x i64> %5 to <16 x i8> +! CHECK: %[[bc2:.*]] = bitcast <2 x i64> %6 to <16 x i8> +! CHECK: %[[bc3:.*]] = bitcast <2 x i64> %7 to <16 x i8> +! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], +! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]] +! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]] +! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]] +! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <2 x i64> +end subroutine vec_sel_testu8 + +! CHECK-LABEL: vec_sel_testr4 +subroutine vec_sel_testr4(arg1, arg2, arg3) + vector(real(4)) :: arg1, arg2, r + vector(unsigned(4)) :: arg3 + r = vec_sel(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8> +! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<4xf32> to vector<16xi8> +! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<4xf32> to vector<16xi8> +! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<4xi32> to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8> +! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8> +! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8> +! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<4xf32> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<4xf32>) -> !fir.vector<4:f32> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<16xi8> +! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<4xf32> to vector<16xi8> +! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<4xi32> to vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8> +! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8> +! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8> +! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<4xf32> + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[bc1:.*]] = bitcast <4 x float> %5 to <16 x i8> +! CHECK: %[[bc2:.*]] = bitcast <4 x float> %6 to <16 x i8> +! CHECK: %[[bc3:.*]] = bitcast <4 x i32> %7 to <16 x i8> +! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], +! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]] +! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]] +! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]] +! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <4 x float> +end subroutine vec_sel_testr4 + +! CHECK-LABEL: vec_sel_testr8 +subroutine vec_sel_testr8(arg1, arg2, arg3) + vector(real(8)) :: arg1, arg2, r + vector(unsigned(8)) :: arg3 + r = vec_sel(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8> +! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<2xf64> to vector<16xi8> +! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<2xf64> to vector<16xi8> +! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<2xi64> to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8> +! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8> +! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8> +! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<2xf64> +! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<2xf64>) -> !fir.vector<2:f64> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<2xf64> to vector<16xi8> +! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<2xf64> to vector<16xi8> +! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<2xi64> to vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]] : vector<16xi8> +! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]] : vector<16xi8> +! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]] : vector<16xi8> +! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]] : vector<16xi8> +! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<2xf64> + +! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[bc1:.*]] = bitcast <2 x double> %5 to <16 x i8> +! CHECK: %[[bc2:.*]] = bitcast <2 x double> %6 to <16 x i8> +! CHECK: %[[bc3:.*]] = bitcast <2 x i64> %7 to <16 x i8> +! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], +! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]] +! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]] +! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]] +! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <2 x double> +end subroutine vec_sel_testr8