Index: flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h =================================================================== --- flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h +++ flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h @@ -29,6 +29,10 @@ Convert, Ctf, Cvf, + Ld, + Lde, + Ldl, + Lxvp, Mergeh, Mergel, Msub, @@ -49,6 +53,8 @@ Ste, Stxv, Sub, + Xld2, + Xlw4, Xor, Xst, Xst_be, @@ -192,6 +198,10 @@ fir::ExtendedValue genVecPerm(mlir::Type resultType, llvm::ArrayRef args); + template + fir::ExtendedValue genVecLdCallGrp(mlir::Type resultType, + llvm::ArrayRef args); + template fir::ExtendedValue genVecNmaddMsub(mlir::Type resultType, llvm::ArrayRef args); Index: flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp =================================================================== --- flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp +++ flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp @@ -129,6 +129,26 @@ static_cast(&PI::genVecInsert), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, /*isElemental=*/true}, + {"__ppc_vec_ld", + static_cast( + &PI::genVecLdCallGrp), + {{{"arg1", asValue}, {"arg2", asAddr}}}, + /*isElemental=*/false}, + {"__ppc_vec_lde", + static_cast( + &PI::genVecLdCallGrp), + {{{"arg1", asValue}, {"arg2", asAddr}}}, + /*isElemental=*/false}, + {"__ppc_vec_ldl", + static_cast( + &PI::genVecLdCallGrp), + {{{"arg1", asValue}, {"arg2", asAddr}}}, + /*isElemental=*/false}, + {"__ppc_vec_lxvp", + static_cast( + &PI::genVecLdCallGrp), + {{{"arg1", asValue}, {"arg2", asAddr}}}, + /*isElemental=*/false}, {"__ppc_vec_mergeh", static_cast( &PI::genVecMerge), @@ -228,6 +248,16 @@ &PI::genVecAddAndMulSubXor), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, + {"__ppc_vec_xld2_", + static_cast( + &PI::genVecLdCallGrp), + {{{"arg1", asValue}, {"arg2", asAddr}}}, + /*isElemental=*/false}, + {"__ppc_vec_xlw4_", + static_cast( + &PI::genVecLdCallGrp), + {{{"arg1", asValue}, {"arg2", asAddr}}}, + /*isElemental=*/false}, {"__ppc_vec_xor", static_cast( &PI::genVecAddAndMulSubXor), @@ -1273,6 +1303,132 @@ return builder.createConvert(loc, resultType, callOp); } +static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value baseAddr, + mlir::Value offset) { + auto typeExtent{fir::SequenceType::getUnknownExtent()}; + // Construct an !fir.ref> type + auto arrRefTy{builder.getRefType(fir::SequenceType::get( + {typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))}; + // Convert arg to !fir.ref> + auto resAddr{builder.create(loc, arrRefTy, baseAddr)}; + + return builder.create(loc, arrRefTy, resAddr, offset); +} + +static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value v, + int64_t len) { + assert(v.getType().isa()); + assert(len > 0); + llvm::SmallVector mask; + for (int64_t i = 0; i < len; ++i) { + mask.push_back(len - 1 - i); + } + auto undefVec{builder.create(loc, v.getType())}; + return builder.create(loc, v, undefVec, mask); +} + +// VEC_LD, VEC_LDE, VEC_LDL, VEC_LXVP, VEC_XLD2, VEC_XLW4 +template +fir::ExtendedValue +PPCIntrinsicLibrary::genVecLdCallGrp(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 2); + auto context{builder.getContext()}; + auto arg0{getBase(args[0])}; + auto arg1{getBase(args[1])}; + + // Prepare the return type in FIR. + auto vecResTyInfo{getVecTypeFromFirType(resultType)}; + auto mlirTy{vecResTyInfo.toMlirVectorType(context)}; + auto firTy{vecResTyInfo.toFirVectorType()}; + + // llvm.ppc.altivec.lvx* returns <4xi32> + // Others, like "llvm.ppc.altivec.lvebx" too if arg2 is not of Integer type + const auto i32Ty{mlir::IntegerType::get(builder.getContext(), 32)}; + const auto mVecI32Ty{mlir::VectorType::get(4, i32Ty)}; + + // For vec_ld, need to convert arg0 from i64 to i32 + if (vop == VecOp::Ld && arg0.getType().getIntOrFloatBitWidth() == 64) + arg0 = builder.createConvert(loc, i32Ty, arg0); + + // Add the %val of arg0 to %addr of arg1 + auto addr{addOffsetToAddress(builder, loc, arg1, arg0)}; + llvm::SmallVector parsedArgs{addr}; + + mlir::Type intrinResTy{nullptr}; + llvm::StringRef fname{}; + switch (vop) { + case VecOp::Ld: + fname = "llvm.ppc.altivec.lvx"; + intrinResTy = mVecI32Ty; + break; + case VecOp::Lde: + switch (vecResTyInfo.eleTy.getIntOrFloatBitWidth()) { + case 8: + fname = "llvm.ppc.altivec.lvebx"; + intrinResTy = mlirTy; + break; + case 16: + fname = "llvm.ppc.altivec.lvehx"; + intrinResTy = mlirTy; + break; + case 32: + fname = "llvm.ppc.altivec.lvewx"; + if (mlir::isa(vecResTyInfo.eleTy)) + intrinResTy = mlirTy; + else + intrinResTy = mVecI32Ty; + break; + default: + llvm_unreachable("invalid vector for vec_lde"); + } + break; + case VecOp::Ldl: + fname = "llvm.ppc.altivec.lvxl"; + intrinResTy = mVecI32Ty; + break; + case VecOp::Lxvp: + fname = "llvm.ppc.vsx.lxvp"; + intrinResTy = fir::VectorType::get(256, mlir::IntegerType::get(context, 1)); + break; + case VecOp::Xld2: { + fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvd2x.be" + : "llvm.ppc.vsx.lxvd2x"; + // llvm.ppc.altivec.lxvd2x* returns <2 x double> + intrinResTy = mlir::VectorType::get(2, mlir::FloatType::getF64(context)); + } break; + case VecOp::Xlw4: + fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvw4x.be" + : "llvm.ppc.vsx.lxvw4x"; + // llvm.ppc.altivec.lxvw4x* returns <4xi32> + intrinResTy = mVecI32Ty; + break; + default: + llvm_unreachable("invalid vector operation for generator"); + } + + auto funcType{ + mlir::FunctionType::get(context, {addr.getType()}, {intrinResTy})}; + auto funcOp{builder.addNamedFunction(loc, fname, funcType)}; + auto result{ + builder.create(loc, funcOp, parsedArgs).getResult(0)}; + + if (vop == VecOp::Lxvp) + return result; + + if (intrinResTy != mlirTy) + result = builder.create(loc, mlirTy, result); + + if (vop != VecOp::Xld2 && vop != VecOp::Xlw4 && isBEVecElemOrderOnLE()) + return builder.createConvert( + loc, firTy, + reverseVectorElements(builder, loc, result, vecResTyInfo.len)); + + return builder.createConvert(loc, firTy, result); +} + // VEC_NMADD, VEC_MSUB template fir::ExtendedValue @@ -1715,33 +1871,6 @@ } } -static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder, - mlir::Location loc, mlir::Value baseAddr, - mlir::Value offset) { - auto typeExtent{fir::SequenceType::getUnknownExtent()}; - // Construct an !fir.ref> type - auto arrRefTy{builder.getRefType(fir::SequenceType::get( - {typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))}; - // Convert arg to !fir.ref> - auto resAddr{builder.create(loc, arrRefTy, baseAddr)}; - - return builder.create(loc, arrRefTy, resAddr, offset); -} - -static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder, - mlir::Location loc, mlir::Value v, - int64_t len) { - assert(v.getType().isa()); - assert(len > 0); - llvm::SmallVector mask; - for (int64_t i = 0; i < len; ++i) { - mask.push_back(len - 1 - i); - } - - auto undefVec{builder.create(loc, v.getType())}; - return builder.create(loc, v, undefVec, mask); -} - // VEC_ST, VEC_STE template void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef args) { Index: flang/module/__ppc_intrinsics.f90 =================================================================== --- flang/module/__ppc_intrinsics.f90 +++ flang/module/__ppc_intrinsics.f90 @@ -150,6 +150,87 @@ !dir$ ignore_tkr(k) arg2; \ end function ; +! vector(i) function f(i, integer) +#define FUNC_VII0I(VKIND) \ + pure vector(integer(VKIND)) function func_vi##VKIND##i0i##VKIND(arg1, arg2); \ + integer(8), intent(in) :: arg1; \ + !dir$ ignore_tkr(k) arg1; \ + integer(VKIND), intent(in) :: arg2; \ + !dir$ ignore_tkr(r) arg2; \ + end function ; + +! vector(r) function f(i, real) +#define FUNC_VRI0R(VKIND) \ + pure vector(real(VKIND)) function func_vr##VKIND##i0r##VKIND(arg1, arg2); \ + integer(8), intent(in) :: arg1; \ + !dir$ ignore_tkr(k) arg1; \ + real(VKIND), intent(in) :: arg2; \ + !dir$ ignore_tkr(r) arg2; \ + end function ; + +! vector(i) function f(i, vector(i)) +#define FUNC_VII0VI(VKIND) \ + pure vector(integer(VKIND)) function func_vi##VKIND##i0vi##VKIND(arg1, arg2); \ + integer(8), intent(in) :: arg1; \ + !dir$ ignore_tkr(k) arg1; \ + vector(integer(VKIND)), intent(in) :: arg2; \ + !dir$ ignore_tkr(r) arg2; \ + end function ; + +! vector(u) function f(i, vector(u)) +#define FUNC_VUI0VU(VKIND) \ + pure vector(unsigned(VKIND)) function func_vu##VKIND##i0vu##VKIND(arg1, arg2); \ + integer(8), intent(in) :: arg1; \ + !dir$ ignore_tkr(k) arg1; \ + vector(unsigned(VKIND)), intent(in) :: arg2; \ + !dir$ ignore_tkr(r) arg2; \ + end function ; + +! vector(r) function f(i, vector(r)) +#define FUNC_VRI0VR(VKIND) \ + pure vector(real(VKIND)) function func_vr##VKIND##i0vr##VKIND(arg1, arg2); \ + integer(8), intent(in) :: arg1; \ + !dir$ ignore_tkr(k) arg1; \ + vector(real(VKIND)), intent(in) :: arg2; \ + !dir$ ignore_tkr(r) arg2; \ + end function ; + +! __vector_pair function f(i, vector(i)) +#define FUNC_VPI0VI(VKIND) \ + pure __vector_pair function func_vpi0vi##VKIND(arg1, arg2); \ + integer(8), intent(in) :: arg1; \ + !dir$ ignore_tkr(k) arg1; \ + vector(integer(VKIND)), intent(in) :: arg2; \ + !dir$ ignore_tkr(r) arg2; \ + end function; + +! __vector_pair function f(i, vector(u)) +#define FUNC_VPI0VU(VKIND) \ + pure __vector_pair function func_vpi0vu##VKIND(arg1, arg2); \ + integer(8), intent(in) :: arg1; \ + !dir$ ignore_tkr(k) arg1; \ + vector(unsigned(VKIND)), intent(in) :: arg2; \ + !dir$ ignore_tkr(r) arg2; \ + end function; + +! __vector_pair function f(i, vector(r)) +#define FUNC_VPI0VR(VKIND) \ + pure __vector_pair function func_vpi0vr##VKIND(arg1, arg2); \ + integer(8), intent(in) :: arg1; \ + !dir$ ignore_tkr(k) arg1; \ + vector(real(VKIND)), intent(in) :: arg2; \ + !dir$ ignore_tkr(r) arg2; \ + end function; + +! __vector_pair function f(i, __vector_pair) +#define FUNC_VPI0VP \ + pure __vector_pair function func_vpi0vp(arg1, arg2); \ + integer(8), intent(in) :: arg1; \ + !dir$ ignore_tkr(k) arg1; \ + __vector_pair, intent(in) :: arg2; \ + !dir$ ignore_tkr(r) arg2; \ + end function; + ! The following macros are specific for the vec_convert(v, mold) intrinsics as ! the argument keywords are different from the other vector intrinsics. ! @@ -203,10 +284,28 @@ ELEM_FUNC_IVRVR(4,4) ELEM_FUNC_IVRVR(4,8) ELEM_FUNC_VRVII(4) ELEM_FUNC_VRVII(8) ELEM_FUNC_VRVUI(4) ELEM_FUNC_VRVUI(8) + FUNC_VII0VI(1) FUNC_VII0VI(2) FUNC_VII0VI(4) FUNC_VII0VI(8) + FUNC_VUI0VU(1) FUNC_VUI0VU(2) FUNC_VUI0VU(4) FUNC_VUI0VU(8) + FUNC_VRI0VR(4) FUNC_VRI0VR(8) + FUNC_VII0I(1) FUNC_VII0I(2) FUNC_VII0I(4) FUNC_VII0I(8) + FUNC_VRI0R(4) FUNC_VRI0R(8) + FUNC_VPI0VI(1) FUNC_VPI0VI(2) FUNC_VPI0VI(4) FUNC_VPI0VI(8) + FUNC_VPI0VU(1) FUNC_VPI0VU(2) FUNC_VPI0VU(4) FUNC_VPI0VU(8) + FUNC_VPI0VR(4) FUNC_VPI0VR(8) + FUNC_VPI0VP #undef FUNC_VEC_CONVERT_VRVIVR #undef FUNC_VEC_CONVERT_VUVIVU #undef FUNC_VEC_CONVERT_VIVIVI +#undef FUNC_VPI0VP +#undef FUNC_VPI0VR +#undef FUNC_VPI0VU +#undef FUNC_VPI0VI +#undef FUNC_VRI0VR +#undef FUNC_VUI0VU +#undef FUNC_VII0VI +#undef FUNC_VRI0R +#undef FUNC_VII0I #undef ELEM_FUNC_RVRI #undef ELEM_FUNC_VRVUI #undef ELEM_FUNC_IVII @@ -913,6 +1012,154 @@ #undef VU_VI_VI #undef VI_VI_VI +!------------------------------------------------------- +! vector function(integer, i/u/r/vector) +!------------------------------------------------------- +! i0 means the integer argument has ignore_tkr(k) +#define VI_I0_VI(NAME, VKIND) __ppc_##NAME##_vi##VKIND##i0##vi##VKIND +#define VU_I0_VU(NAME, VKIND) __ppc_##NAME##_vu##VKIND##i0##vu##VKIND +#define VR_I0_VR(NAME, VKIND) __ppc_##NAME##_vr##VKIND##i0##vr##VKIND +#define VI_I0_I(NAME, VKIND) __ppc_##NAME##_vi##VKIND##i0##i##VKIND +#define VR_I0_R(NAME, VKIND) __ppc_##NAME##_vr##VKIND##i0##r##VKIND + +#define VEC_VI_I0_VI(NAME, VKIND) \ + procedure(func_vi##VKIND##i0##vi##VKIND) :: VI_I0_VI(NAME, VKIND); +#define VEC_VU_I0_VU(NAME, VKIND) \ + procedure(func_vu##VKIND##i0##vu##VKIND) :: VU_I0_VU(NAME, VKIND); +#define VEC_VR_I0_VR(NAME, VKIND) \ + procedure(func_vr##VKIND##i0##vr##VKIND) :: VR_I0_VR(NAME, VKIND); +#define VEC_VI_I0_I(NAME, VKIND) \ + procedure(func_vi##VKIND##i0##i##VKIND) :: VI_I0_I(NAME, VKIND); +#define VEC_VR_I0_R(NAME, VKIND) \ + procedure(func_vr##VKIND##i0##r##VKIND) :: VR_I0_R(NAME, VKIND); + +! vec_ld + VEC_VI_I0_VI(vec_ld,1) VEC_VI_I0_VI(vec_ld,2) VEC_VI_I0_VI(vec_ld,4) + VEC_VU_I0_VU(vec_ld,1) VEC_VU_I0_VU(vec_ld,2) VEC_VU_I0_VU(vec_ld,4) + VEC_VR_I0_VR(vec_ld,4) + VEC_VI_I0_I(vec_ld,1) VEC_VI_I0_I(vec_ld,2) VEC_VI_I0_I(vec_ld,4) + VEC_VR_I0_R(vec_ld,4) + interface vec_ld + procedure :: VI_I0_VI(vec_ld,1), VI_I0_VI(vec_ld,2), VI_I0_VI(vec_ld,4) + procedure :: VU_I0_VU(vec_ld,1), VU_I0_VU(vec_ld,2), VU_I0_VU(vec_ld,4) + procedure :: VR_I0_VR(vec_ld,4) + procedure :: VI_I0_I(vec_ld,1), VI_I0_I(vec_ld,2), VI_I0_I(vec_ld,4) + procedure :: VR_I0_R(vec_ld,4) + end interface + public :: vec_ld + +! vec_lde + VEC_VI_I0_I(vec_lde,1) VEC_VI_I0_I(vec_lde,2) VEC_VI_I0_I(vec_lde,4) + VEC_VR_I0_R(vec_lde,4) + interface vec_lde + procedure :: VI_I0_I(vec_lde,1), VI_I0_I(vec_lde,2), VI_I0_I(vec_lde,4) + procedure :: VR_I0_R(vec_lde,4) + end interface + public :: vec_lde + +! vec_ldl + VEC_VI_I0_VI(vec_ldl,1) VEC_VI_I0_VI(vec_ldl,2) VEC_VI_I0_VI(vec_ldl,4) + VEC_VU_I0_VU(vec_ldl,1) VEC_VU_I0_VU(vec_ldl,2) VEC_VU_I0_VU(vec_ldl,4) + VEC_VR_I0_VR(vec_ldl,4) + VEC_VI_I0_I(vec_ldl,1) VEC_VI_I0_I(vec_ldl,2) VEC_VI_I0_I(vec_ldl,4) + VEC_VR_I0_R(vec_ldl,4) + interface vec_ldl + procedure :: VI_I0_VI(vec_ldl,1), VI_I0_VI(vec_ldl,2), VI_I0_VI(vec_ldl,4) + procedure :: VU_I0_VU(vec_ldl,1), VU_I0_VU(vec_ldl,2), VU_I0_VU(vec_ldl,4) + procedure :: VR_I0_VR(vec_ldl,4) + procedure :: VI_I0_I(vec_ldl,1), VI_I0_I(vec_ldl,2), VI_I0_I(vec_ldl,4) + procedure :: VR_I0_R(vec_ldl,4) + end interface + public :: vec_ldl + +! vec_xld2 + VEC_VI_I0_VI(vec_xld2_,1) VEC_VI_I0_VI(vec_xld2_,2) VEC_VI_I0_VI(vec_xld2_,4) VEC_VI_I0_VI(vec_xld2_,8) + VEC_VU_I0_VU(vec_xld2_,1) VEC_VU_I0_VU(vec_xld2_,2) VEC_VU_I0_VU(vec_xld2_,4) VEC_VU_I0_VU(vec_xld2_,8) + VEC_VR_I0_VR(vec_xld2_,4) VEC_VR_I0_VR(vec_xld2_,8) + VEC_VI_I0_I(vec_xld2_,1) VEC_VI_I0_I(vec_xld2_,2) VEC_VI_I0_I(vec_xld2_,4) VEC_VI_I0_I(vec_xld2_,8) + VEC_VR_I0_R(vec_xld2_,4) VEC_VR_I0_R(vec_xld2_,8) + interface vec_xld2 + procedure :: VI_I0_VI(vec_xld2_,1), VI_I0_VI(vec_xld2_,2), VI_I0_VI(vec_xld2_,4), VI_I0_VI(vec_xld2_,8) + procedure :: VU_I0_VU(vec_xld2_,1), VU_I0_VU(vec_xld2_,2), VU_I0_VU(vec_xld2_,4), VU_I0_VU(vec_xld2_,8) + procedure :: VR_I0_VR(vec_xld2_,4), VR_I0_VR(vec_xld2_,8) + procedure :: VI_I0_I(vec_xld2_,1), VI_I0_I(vec_xld2_,2), VI_I0_I(vec_xld2_,4), VI_I0_I(vec_xld2_,8) + procedure :: VR_I0_R(vec_xld2_,4), VR_I0_R(vec_xld2_,8) + end interface + public :: vec_xld2 + +! vec_xlw4 + VEC_VI_I0_VI(vec_xlw4_,1) VEC_VI_I0_VI(vec_xlw4_,2) + VEC_VU_I0_VU(vec_xlw4_,1) VEC_VU_I0_VU(vec_xlw4_,2) VEC_VU_I0_VU(vec_xlw4_,4) + VEC_VR_I0_VR(vec_xlw4_,4) + VEC_VI_I0_I(vec_xlw4_,1) VEC_VI_I0_I(vec_xlw4_,2) VEC_VI_I0_I(vec_xlw4_,4) + VEC_VR_I0_R(vec_xlw4_,4) + interface vec_xlw4 + procedure :: VI_I0_VI(vec_xlw4_,1), VI_I0_VI(vec_xlw4_,2) + procedure :: VU_I0_VU(vec_xlw4_,1), VU_I0_VU(vec_xlw4_,2), VU_I0_VU(vec_xlw4_,4) + procedure :: VR_I0_VR(vec_xlw4_,4) + procedure :: VI_I0_I(vec_xlw4_,1), VI_I0_I(vec_xlw4_,2), VI_I0_I(vec_xlw4_,4) + procedure :: VR_I0_R(vec_xlw4_,4) + end interface + public :: vec_xlw4 + +#undef VEC_VR_I0_R +#undef VEC_VI_I0_I +#undef VEC_VR_I0_VR +#undef VEC_VU_I0_VU +#undef VEC_VI_I0_VI +#undef VR_I0_R +#undef VI_I0_I +#undef VR_I0_VR +#undef VU_I0_VU +#undef VI_I0_VI + +!------------------------------------------------------- +! __vector_pair function(integer, vector/__vector_pair) +!------------------------------------------------------- +#define VP_I0_VI(NAME, VKIND) __ppc_##NAME##_vpi0##vi##VKIND +#define VP_I0_VU(NAME, VKIND) __ppc_##NAME##_vpi0##vu##VKIND +#define VP_I0_VR(NAME, VKIND) __ppc_##NAME##_vpi0##vr##VKIND +#define VP_I0_VP(NAME) __ppc_##NAME##_vpi0vp0 + +#define VEC_VP_I0_VI(NAME, VKIND) \ + procedure(func_vpi0vi##VKIND) :: VP_I0_VI(NAME, VKIND); +#define VEC_VP_I0_VU(NAME, VKIND) \ + procedure(func_vpi0vu##VKIND) :: VP_I0_VU(NAME, VKIND); +#define VEC_VP_I0_VR(NAME, VKIND) \ + procedure(func_vpi0vr##VKIND) :: VP_I0_VR(NAME, VKIND); +#define VEC_VP_I0_VP(NAME) procedure(func_vpi0vp) :: VP_I0_VP(NAME); + +! vec_lxvp + VEC_VP_I0_VI(vec_lxvp,1) VEC_VP_I0_VI(vec_lxvp,2) VEC_VP_I0_VI(vec_lxvp,4) VEC_VP_I0_VI(vec_lxvp,8) + VEC_VP_I0_VU(vec_lxvp,1) VEC_VP_I0_VU(vec_lxvp,2) VEC_VP_I0_VU(vec_lxvp,4) VEC_VP_I0_VU(vec_lxvp,8) + VEC_VP_I0_VR(vec_lxvp,4) VEC_VP_I0_VR(vec_lxvp,8) + VEC_VP_I0_VP(vec_lxvp) + interface vec_lxvp + procedure :: VP_I0_VI(vec_lxvp,1), VP_I0_VI(vec_lxvp,2), VP_I0_VI(vec_lxvp,4), VP_I0_VI(vec_lxvp,8) + procedure :: VP_I0_VU(vec_lxvp,1), VP_I0_VU(vec_lxvp,2), VP_I0_VU(vec_lxvp,4), VP_I0_VU(vec_lxvp,8) + procedure :: VP_I0_VR(vec_lxvp,4), VP_I0_VR(vec_lxvp,8) + procedure :: VP_I0_VP(vec_lxvp) + end interface vec_lxvp + public :: vec_lxvp + +! vsx_lxvp (alias to vec_lxvp) + interface vsx_lxvp + procedure :: VP_I0_VI(vec_lxvp,1), VP_I0_VI(vec_lxvp,2), VP_I0_VI(vec_lxvp,4), VP_I0_VI(vec_lxvp,8) + procedure :: VP_I0_VU(vec_lxvp,1), VP_I0_VU(vec_lxvp,2), VP_I0_VU(vec_lxvp,4), VP_I0_VU(vec_lxvp,8) + procedure :: VP_I0_VR(vec_lxvp,4), VP_I0_VR(vec_lxvp,8) + procedure :: VP_I0_VP(vec_lxvp) + end interface vsx_lxvp + public :: vsx_lxvp + +#undef VEC_VP_I_VP +#undef VEC_VP_I_VR +#undef VEC_VP_I_VU +#undef VEC_VP_I_VI +#undef VP_I_VP +#undef VP_I_VR +#undef VP_I_VU +#undef VP_I_VI + !----------------------------------------- ! vector function(vector, vector, vector) !----------------------------------------- Index: flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 =================================================================== --- /dev/null +++ flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 @@ -0,0 +1,522 @@ +! RUN: bbc -emit-fir %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="FIR" %s +! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="LLVMIR" %s +! REQUIRES: target=powerpc{{.*}} + +!------------------- +! vec_ld +!------------------- + +! CHECK-LABEL: @vec_ld_testi8 +subroutine vec_ld_testi8(arg1, arg2, res) + integer(1) :: arg1 + vector(integer(1)) :: arg2, res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i8) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<16xi8> +! FIR: %[[undefv:.*]] = fir.undefined vector<16xi8> +! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8> +! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<16xi8>) -> !fir.vector<16:i8> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <16 x i8> +! LLVMIR: %[[shflv:.*]] = shufflevector <16 x i8> %[[bc]], <16 x i8> undef, <16 x i32> +! LLVMIR: store <16 x i8> %[[shflv]], ptr %2, align 16 +end subroutine vec_ld_testi8 + +! CHECK-LABEL: @vec_ld_testi16 +subroutine vec_ld_testi16(arg1, arg2, res) + integer(2) :: arg1 + vector(integer(2)) :: arg2, res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<8xi16> +! FIR: %[[undefv:.*]] = fir.undefined vector<8xi16> +! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16> +! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<8xi16>) -> !fir.vector<8:i16> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <8 x i16> +! LLVMIR: %[[shflv:.*]] = shufflevector <8 x i16> %[[bc]], <8 x i16> undef, <8 x i32> +! LLVMIR: store <8 x i16> %[[shflv]], ptr %2, align 16 +end subroutine vec_ld_testi16 + +! CHECK-LABEL: @vec_ld_testi32 +subroutine vec_ld_testi32(arg1, arg2, res) + integer(4) :: arg1 + vector(integer(4)) :: arg2, res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32> +! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32> +! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:i32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: %[[shflv:.*]] = shufflevector <4 x i32> %[[ld]], <4 x i32> undef, <4 x i32> +! LLVMIR: store <4 x i32> %[[shflv]], ptr %2, align 16 +end subroutine vec_ld_testi32 + +! CHECK-LABEL: @vec_ld_testf32 +subroutine vec_ld_testf32(arg1, arg2, res) + integer(8) :: arg1 + vector(real(4)) :: arg2, res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[i4:.*]] = fir.convert %[[arg1]] : (i64) -> i32 +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[i4]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32> +! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32> +! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32> +! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8 +! LLVMIR: %[[i4:.*]] = trunc i64 %[[arg1]] to i32 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[i4]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float> +! LLVMIR: %[[shflv:.*]] = shufflevector <4 x float> %[[bc]], <4 x float> undef, <4 x i32> +! LLVMIR: store <4 x float> %[[shflv]], ptr %2, align 16 +end subroutine vec_ld_testf32 + +! CHECK-LABEL: @vec_ld_testu32 +subroutine vec_ld_testu32(arg1, arg2, res) + integer(1) :: arg1 + vector(unsigned(4)) :: arg2, res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i8) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32> +! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32> +! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:ui32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: %[[shflv:.*]] = shufflevector <4 x i32> %[[ld]], <4 x i32> undef, <4 x i32> +! LLVMIR: store <4 x i32> %[[shflv]], ptr %2, align 16 +end subroutine vec_ld_testu32 + +! CHECK-LABEL: @vec_ld_testi32a +subroutine vec_ld_testi32a(arg1, arg2, res) + integer(4) :: arg1 + integer(4) :: arg2(10) + vector(integer(4)) :: res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32> +! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32> +! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:i32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: %[[shflv:.*]] = shufflevector <4 x i32> %[[ld]], <4 x i32> undef, <4 x i32> +! LLVMIR: store <4 x i32> %[[shflv]], ptr %2, align 16 +end subroutine vec_ld_testi32a + +! CHECK-LABEL: @vec_ld_testf32av +subroutine vec_ld_testf32av(arg1, arg2, res) + integer(8) :: arg1 + vector(real(4)) :: arg2(2, 4, 8) + vector(real(4)) :: res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[i4:.*]] = fir.convert %[[arg1]] : (i64) -> i32 +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[i4]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32> +! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32> +! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32> +! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8 +! LLVMIR: %[[i4:.*]] = trunc i64 %[[arg1]] to i32 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[i4]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float> +! LLVMIR: %[[shflv:.*]] = shufflevector <4 x float> %[[bc]], <4 x float> undef, <4 x i32> +! LLVMIR: store <4 x float> %[[shflv]], ptr %2, align 16 +end subroutine vec_ld_testf32av + +! CHECK-LABEL: @vec_ld_testi32s +subroutine vec_ld_testi32s(arg1, arg2, res) + integer(4) :: arg1 + real(4) :: arg2 + vector(real(4)) :: res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32> +! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32> +! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32> +! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float> +! LLVMIR: %[[shflv:.*]] = shufflevector <4 x float> %[[bc]], <4 x float> undef, <4 x i32> +! LLVMIR: store <4 x float> %[[shflv]], ptr %2, align 16 +end subroutine vec_ld_testi32s + +!------------------- +! vec_lde +!------------------- + +! CHECK-LABEL: @vec_lde_testi8s +subroutine vec_lde_testi8s(arg1, arg2, res) + integer(1) :: arg1 + integer(1) :: arg2 + vector(integer(1)) :: res + res = vec_lde(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i8) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvebx(%[[addr]]) fastmath : (!fir.ref>) -> vector<16xi8> +! FIR: %[[undefv:.*]] = fir.undefined vector<16xi8> +! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8> +! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<16xi8>) -> !fir.vector<16:i8> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvebx(ptr %[[addr]]) +! LLVMIR: %[[shflv:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> +! LLVMIR: store <16 x i8> %[[shflv]], ptr %2, align 16 +end subroutine vec_lde_testi8s + +! CHECK-LABEL: @vec_lde_testi16a +subroutine vec_lde_testi16a(arg1, arg2, res) + integer(2) :: arg1 + integer(2) :: arg2(2, 11, 7) + vector(integer(2)) :: res + res = vec_lde(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvehx(%[[addr]]) fastmath : (!fir.ref>) -> vector<8xi16> +! FIR: %[[undefv:.*]] = fir.undefined vector<8xi16> +! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16> +! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<8xi16>) -> !fir.vector<8:i16> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <8 x i16> @llvm.ppc.altivec.lvehx(ptr %[[addr]]) +! LLVMIR: %[[shflv:.*]] = shufflevector <8 x i16> %[[ld]], <8 x i16> undef, <8 x i32> +! LLVMIR: store <8 x i16> %[[shflv]], ptr %2, align 16 +end subroutine vec_lde_testi16a + +! CHECK-LABEL: @vec_lde_testi32a +subroutine vec_lde_testi32a(arg1, arg2, res) + integer(4) :: arg1 + integer(4) :: arg2(5) + vector(integer(4)) :: res + res = vec_lde(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32> +! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32> +! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:i32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]]) +! LLVMIR: %[[shflv:.*]] = shufflevector <4 x i32> %[[ld]], <4 x i32> undef, <4 x i32> +! LLVMIR: store <4 x i32> %[[shflv]], ptr %2, align 16 +end subroutine vec_lde_testi32a + +! CHECK-LABEL: @vec_lde_testf32a +subroutine vec_lde_testf32a(arg1, arg2, res) + integer(8) :: arg1 + real(4) :: arg2(11) + vector(real(4)) :: res + res = vec_lde(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i64) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32> +! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32> +! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32> +! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float> +! LLVMIR: %[[shflv:.*]] = shufflevector <4 x float> %[[bc]], <4 x float> undef, <4 x i32> +! LLVMIR: store <4 x float> %[[shflv]], ptr %2, align 16 +end subroutine vec_lde_testf32a + +!------------------- +! vec_xld2 +!------------------- + +! CHECK-LABEL: @vec_xld2_testi8a +subroutine vec_xld2_testi8a(arg1, arg2, res) + integer(1) :: arg1 + vector(integer(1)) :: arg2(4) + vector(integer(1)) :: res + res = vec_xld2(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i8) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath : (!fir.ref>) -> vector<2xf64> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<16xi8> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <16 x i8> +! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16 +end subroutine vec_xld2_testi8a + +! CHECK-LABEL: @vec_xld2_testi16a +subroutine vec_xld2_testi16a(arg1, arg2, res) + integer(2) :: arg1 + vector(integer(2)) :: arg2(4) + vector(integer(2)) :: res + res = vec_xld2(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath : (!fir.ref>) -> vector<2xf64> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<8xi16> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <8 x i16> +! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16 +end subroutine vec_xld2_testi16a + +! CHECK-LABEL: @vec_xld2_testi32a +subroutine vec_xld2_testi32a(arg1, arg2, res) + integer(4) :: arg1 + vector(integer(4)) :: arg2(11) + vector(integer(4)) :: res + res = vec_xld2(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath : (!fir.ref>) -> vector<2xf64> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xi32> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xi32>) -> !fir.vector<4:i32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <4 x i32> +! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16 +end subroutine vec_xld2_testi32a + +! CHECK-LABEL: @vec_xld2_testi64a +subroutine vec_xld2_testi64a(arg1, arg2, res) + integer(8) :: arg1 + vector(integer(8)) :: arg2(31,7) + vector(integer(8)) :: res + res = vec_xld2(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i64) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath : (!fir.ref>) -> vector<2xf64> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<2xi64> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xi64>) -> !fir.vector<2:i64> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <2 x i64> +! LLVMIR: store <2 x i64> %[[bc]], ptr %2, align 16 +end subroutine vec_xld2_testi64a + +! CHECK-LABEL: @vec_xld2_testf32a +subroutine vec_xld2_testf32a(arg1, arg2, res) + integer(2) :: arg1 + vector(real(4)) :: arg2(5) + vector(real(4)) :: res + res = vec_xld2(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath : (!fir.ref>) -> vector<2xf64> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xf32> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <4 x float> +! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 +end subroutine vec_xld2_testf32a + +! CHECK-LABEL: @vec_xld2_testf64a +subroutine vec_xld2_testf64a(arg1, arg2, res) + integer(8) :: arg1 + vector(real(8)) :: arg2(4) + vector(real(8)) :: res + res = vec_xld2(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i64) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath : (!fir.ref>) -> vector<2xf64> +! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]]) +! LLVMIR: store <2 x double> %[[ld]], ptr %2, align 16 +end subroutine vec_xld2_testf64a + +!------------------- +! vec_xlw4 +!------------------- + +! CHECK-LABEL: @vec_xlw4_testi8a +subroutine vec_xlw4_testi8a(arg1, arg2, res) + integer(1) :: arg1 + vector(integer(1)) :: arg2(2, 11, 37) + vector(integer(1)) :: res + res = vec_xlw4(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i8) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<16xi8> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <16 x i8> +! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16 +end subroutine vec_xlw4_testi8a + +! CHECK-LABEL: @vec_xlw4_testi16a +subroutine vec_xlw4_testi16a(arg1, arg2, res) + integer(2) :: arg1 + vector(integer(2)) :: arg2(2, 8) + vector(integer(2)) :: res + res = vec_xlw4(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<8xi16> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <8 x i16> +! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16 +end subroutine vec_xlw4_testi16a + +! CHECK-LABEL: @vec_xlw4_testu32a +subroutine vec_xlw4_testu32a(arg1, arg2, res) + integer(4) :: arg1 + vector(unsigned(4)) :: arg2(8, 4) + vector(unsigned(4)) :: res + res = vec_xlw4(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:ui32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]]) +! LLVMIR: store <4 x i32> %[[ld]], ptr %2, align 16 +end subroutine vec_xlw4_testu32a + +! CHECK-LABEL: @vec_xlw4_testf32a +subroutine vec_xlw4_testf32a(arg1, arg2, res) + integer(2) :: arg1 + vector(real(4)) :: arg2 + vector(real(4)) :: res + res = vec_xlw4(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float> +! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 +end subroutine vec_xlw4_testf32a Index: flang/test/Lower/PowerPC/ppc-vec-load-pwr10.f90 =================================================================== --- /dev/null +++ flang/test/Lower/PowerPC/ppc-vec-load-pwr10.f90 @@ -0,0 +1,370 @@ +! RUN: %flang_fc1 -target-cpu pwr10 -emit-fir %s -o - | FileCheck --check-prefixes="FIR" %s +! RUN: %flang_fc1 -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR-P10" %s +! REQUIRES: target=powerpc{{.*}} + +!---------------------- +! vec_lxvp +!---------------------- + +! CHECK-LABEL: @vec_lxvp_test_i2_ +subroutine vec_lxvp_test_i2(v1, offset, vp) + implicit none + integer(2) :: offset + vector(integer(2)) :: v1 + __vector_pair :: vp + vp = vec_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vec_lxvp_test_i2 + +! CHECK-LABEL: @vec_lxvp_test_i4_ +subroutine vec_lxvp_test_i4(v1, offset, vp) + implicit none + integer(2) :: offset + vector(integer(4)) :: v1 + __vector_pair :: vp + vp = vec_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vec_lxvp_test_i4 + +! CHECK-LABEL: @vec_lxvp_test_u2_ +subroutine vec_lxvp_test_u2(v1, offset, vp) + implicit none + integer(2) :: offset + vector(unsigned(2)) :: v1 + __vector_pair :: vp + vp = vec_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vec_lxvp_test_u2 + +! CHECK-LABEL: @vec_lxvp_test_u4_ +subroutine vec_lxvp_test_u4(v1, offset, vp) + implicit none + integer(2) :: offset + vector(unsigned(4)) :: v1 + __vector_pair :: vp + vp = vec_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vec_lxvp_test_u4 + +! CHECK-LABEL: @vec_lxvp_test_r4_ +subroutine vec_lxvp_test_r4(v1, offset, vp) + implicit none + integer(2) :: offset + vector(real(4)) :: v1 + __vector_pair :: vp + vp = vec_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vec_lxvp_test_r4 + +! CHECK-LABEL: @vec_lxvp_test_r8_ +subroutine vec_lxvp_test_r8(v1, offset, vp) + implicit none + integer(2) :: offset + vector(real(8)) :: v1 + __vector_pair :: vp + vp = vec_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vec_lxvp_test_r8 + +! CHECK-LABEL: @vec_lxvp_test_vp_ +subroutine vec_lxvp_test_vp(v1, offset, vp) + implicit none + integer(2) :: offset + __vector_pair :: v1 + __vector_pair :: vp + vp = vec_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vec_lxvp_test_vp + +! CHECK-LABEL: @vec_lxvp_test_i2_arr_ +subroutine vec_lxvp_test_i2_arr(v1, offset, vp) + implicit none + integer :: offset + vector(integer(2)) :: v1(10) + __vector_pair :: vp + vp = vec_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i32, ptr %1, align 4 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vec_lxvp_test_i2_arr + +! CHECK-LABEL: @vec_lxvp_test_i4_arr_ +subroutine vec_lxvp_test_i4_arr(v1, offset, vp) + implicit none + integer :: offset + vector(integer(4)) :: v1(10) + __vector_pair :: vp + vp = vec_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i32, ptr %1, align 4 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vec_lxvp_test_i4_arr + +! CHECK-LABEL: @vec_lxvp_test_u2_arr_ +subroutine vec_lxvp_test_u2_arr(v1, offset, vp) + implicit none + integer :: offset + vector(unsigned(2)) :: v1(10) + __vector_pair :: vp + vp = vec_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i32, ptr %1, align 4 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vec_lxvp_test_u2_arr + +! CHECK-LABEL: @vec_lxvp_test_u4_arr_ +subroutine vec_lxvp_test_u4_arr(v1, offset, vp) + implicit none + integer :: offset + vector(unsigned(4)) :: v1(10) + __vector_pair :: vp + vp = vec_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i32, ptr %1, align 4 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vec_lxvp_test_u4_arr + +! CHECK-LABEL: @vec_lxvp_test_r4_arr_ +subroutine vec_lxvp_test_r4_arr(v1, offset, vp) + implicit none + integer :: offset + vector(real(4)) :: v1(10) + __vector_pair :: vp + vp = vec_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i32, ptr %1, align 4 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vec_lxvp_test_r4_arr + +! CHECK-LABEL: @vec_lxvp_test_r8_arr_ +subroutine vec_lxvp_test_r8_arr(v1, offset, vp) + implicit none + integer :: offset + vector(real(8)) :: v1(10) + __vector_pair :: vp + vp = vec_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i32, ptr %1, align 4 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vec_lxvp_test_r8_arr + +! CHECK-LABEL: @vec_lxvp_test_vp_arr_ +subroutine vec_lxvp_test_vp_arr(v1, offset, vp) + implicit none + integer(8) :: offset + __vector_pair :: v1(10) + __vector_pair :: vp + vp = vec_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i64) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i64, ptr %1, align 8 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i64 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vec_lxvp_test_vp_arr + +!---------------------- +! vsx_lxvp +!---------------------- +! CHECK-LABEL: @vsx_lxvp_test_i4_ +subroutine vsx_lxvp_test_i4(v1, offset, vp) + implicit none + integer(2) :: offset + vector(integer(4)) :: v1 + __vector_pair :: vp + vp = vsx_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vsx_lxvp_test_i4 + +! CHECK-LABEL: @vsx_lxvp_test_r8_ +subroutine vsx_lxvp_test_r8(v1, offset, vp) + implicit none + integer(2) :: offset + vector(real(8)) :: v1 + __vector_pair :: vp + vp = vsx_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vsx_lxvp_test_r8 + +! CHECK-LABEL: @vsx_lxvp_test_i2_arr_ +subroutine vsx_lxvp_test_i2_arr(v1, offset, vp) + implicit none + integer :: offset + vector(integer(2)) :: v1(10) + __vector_pair :: vp + vp = vsx_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i32, ptr %1, align 4 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vsx_lxvp_test_i2_arr + +! CHECK-LABEL: @vsx_lxvp_test_vp_arr_ +subroutine vsx_lxvp_test_vp_arr(v1, offset, vp) + implicit none + integer(8) :: offset + __vector_pair :: v1(10) + __vector_pair :: vp + vp = vsx_lxvp(offset, v1) + +! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref +! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref>, i64) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath : (!fir.ref>) -> !fir.vector<256:i1> +! FIR: fir.store %[[call]] to %arg2 : !fir.ref> + +! LLVMIR-P10: %[[offset:.*]] = load i64, ptr %1, align 8 +! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i64 %[[offset]] +! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]]) +! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32 +end subroutine vsx_lxvp_test_vp_arr Index: flang/test/Lower/PowerPC/ppc-vec-load.f90 =================================================================== --- /dev/null +++ flang/test/Lower/PowerPC/ppc-vec-load.f90 @@ -0,0 +1,655 @@ +! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="FIR" %s +! RUN: %flang -emit-llvm -S %s -o - | FileCheck --check-prefixes="LLVMIR" %s +! REQUIRES: target=powerpc{{.*}} + +!---------------------- +! vec_ld +!---------------------- + +! CHECK-LABEL: @vec_ld_testi8 +subroutine vec_ld_testi8(arg1, arg2, res) + integer(1) :: arg1 + vector(integer(1)) :: arg2, res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i8) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<16xi8> +! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i8, ptr %{{.*}}, align 1 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <16 x i8> +! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16 +end subroutine vec_ld_testi8 + +! CHECK-LABEL: @vec_ld_testi16 +subroutine vec_ld_testi16(arg1, arg2, res) + integer(2) :: arg1 + vector(integer(2)) :: arg2, res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<8xi16> +! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <8 x i16> +! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16 +end subroutine vec_ld_testi16 + +! CHECK-LABEL: @vec_ld_testi32 +subroutine vec_ld_testi32(arg1, arg2, res) + integer(4) :: arg1 + vector(integer(4)) :: arg2, res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[bc:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16 +end subroutine vec_ld_testi32 + +! CHECK-LABEL: @vec_ld_testf32 +subroutine vec_ld_testf32(arg1, arg2, res) + integer(8) :: arg1 + vector(real(4)) :: arg2, res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg1i32:.*]] = fir.convert %[[arg1]] : (i64) -> i32 +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1i32]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32> +! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8 +! LLVMIR: %[[arg1i32:.*]] = trunc i64 %[[arg1]] to i32 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1i32]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float> +! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 +end subroutine vec_ld_testf32 + +! CHECK-LABEL: @vec_ld_testu32 +subroutine vec_ld_testu32(arg1, arg2, res) + integer(1) :: arg1 + vector(unsigned(4)) :: arg2, res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i8) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:ui32> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16 +end subroutine vec_ld_testu32 + +! CHECK-LABEL: @vec_ld_testi32a +subroutine vec_ld_testi32a(arg1, arg2, res) + integer(4) :: arg1 + integer(4) :: arg2(10) + vector(integer(4)) :: res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16 +end subroutine vec_ld_testi32a + +! CHECK-LABEL: @vec_ld_testf32av +subroutine vec_ld_testf32av(arg1, arg2, res) + integer(8) :: arg1 + vector(real(4)) :: arg2(2, 4, 8) + vector(real(4)) :: res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg1i32:.*]] = fir.convert %[[arg1]] : (i64) -> i32 +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1i32]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32> +! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8 +! LLVMIR: %[[arg1i32:.*]] = trunc i64 %[[arg1]] to i32 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1i32]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float> +! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 +end subroutine vec_ld_testf32av + +! CHECK-LABEL: @vec_ld_testi32s +subroutine vec_ld_testi32s(arg1, arg2, res) + integer(4) :: arg1 + real(4) :: arg2 + vector(real(4)) :: res + res = vec_ld(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32> +! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float> +! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 +end subroutine vec_ld_testi32s + +!---------------------- +! vec_lde +!---------------------- + +! CHECK-LABEL: @vec_lde_testi8s +subroutine vec_lde_testi8s(arg1, arg2, res) + integer(1) :: arg1 + integer(1) :: arg2 + vector(integer(1)) :: res + res = vec_lde(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i8) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvebx(%[[addr]]) fastmath : (!fir.ref>) -> vector<16xi8> +! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<16xi8>) -> !fir.vector<16:i8> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <16 x i8> @llvm.ppc.altivec.lvebx(ptr %[[addr]]) +! LLVMIR: store <16 x i8> %[[call]], ptr %2, align 16 +end subroutine vec_lde_testi8s + +! CHECK-LABEL: @vec_lde_testi16a +subroutine vec_lde_testi16a(arg1, arg2, res) + integer(2) :: arg1 + integer(2) :: arg2(2, 4, 8) + vector(integer(2)) :: res + res = vec_lde(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvehx(%[[addr]]) fastmath : (!fir.ref>) -> vector<8xi16> +! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<8xi16>) -> !fir.vector<8:i16> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <8 x i16> @llvm.ppc.altivec.lvehx(ptr %[[addr]]) +! LLVMIR: store <8 x i16> %[[call]], ptr %2, align 16 +end subroutine vec_lde_testi16a + +! CHECK-LABEL: @vec_lde_testi32a +subroutine vec_lde_testi32a(arg1, arg2, res) + integer(4) :: arg1 + integer(4) :: arg2(4) + vector(integer(4)) :: res + res = vec_lde(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]]) +! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16 +end subroutine vec_lde_testi32a + +! CHECK-LABEL: @vec_lde_testf32a +subroutine vec_lde_testf32a(arg1, arg2, res) + integer(8) :: arg1 + real(4) :: arg2(4) + vector(real(4)) :: res + res = vec_lde(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i64) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32> +! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float> +! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 +end subroutine vec_lde_testf32a + +!---------------------- +! vec_ldl +!---------------------- + +! CHECK-LABEL: @vec_ldl_testi8 +subroutine vec_ldl_testi8(arg1, arg2, res) + integer(1) :: arg1 + vector(integer(1)) :: arg2, res + res = vec_ldl(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i8) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<16xi8> +! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i8, ptr %{{.*}}, align 1 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <16 x i8> +! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16 +end subroutine vec_ldl_testi8 + +! CHECK-LABEL: @vec_ldl_testi16 +subroutine vec_ldl_testi16(arg1, arg2, res) + integer(2) :: arg1 + vector(integer(2)) :: arg2, res + res = vec_ldl(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<8xi16> +! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <8 x i16> +! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16 +end subroutine vec_ldl_testi16 + +! CHECK-LABEL: @vec_ldl_testi32 +subroutine vec_ldl_testi32(arg1, arg2, res) + integer(4) :: arg1 + vector(integer(4)) :: arg2, res + res = vec_ldl(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[bc:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]]) +! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16 +end subroutine vec_ldl_testi32 + +! CHECK-LABEL: @vec_ldl_testf32 +subroutine vec_ldl_testf32(arg1, arg2, res) + integer(8) :: arg1 + vector(real(4)) :: arg2, res + res = vec_ldl(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i64) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32> +! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float> +! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 +end subroutine vec_ldl_testf32 + +! CHECK-LABEL: @vec_ldl_testu32 +subroutine vec_ldl_testu32(arg1, arg2, res) + integer(1) :: arg1 + vector(unsigned(4)) :: arg2, res + res = vec_ldl(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i8) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:ui32> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]]) +! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16 +end subroutine vec_ldl_testu32 + +! CHECK-LABEL: @vec_ldl_testi32a +subroutine vec_ldl_testi32a(arg1, arg2, res) + integer(4) :: arg1 + integer(4) :: arg2(10) + vector(integer(4)) :: res + res = vec_ldl(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]]) +! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16 +end subroutine vec_ldl_testi32a + +! CHECK-LABEL: @vec_ldl_testf32av +subroutine vec_ldl_testf32av(arg1, arg2, res) + integer(8) :: arg1 + vector(real(4)) :: arg2(2, 4, 8) + vector(real(4)) :: res + res = vec_ldl(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i64) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32> +! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float> +! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 +end subroutine vec_ldl_testf32av + +! CHECK-LABEL: @vec_ldl_testi32s +subroutine vec_ldl_testi32s(arg1, arg2, res) + integer(4) :: arg1 + real(4) :: arg2 + vector(real(4)) :: res + res = vec_ldl(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32> +! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float> +! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 +end subroutine vec_ldl_testi32s + +!---------------------- +! vec_xld2 +!---------------------- + +! CHECK-LABEL: @vec_xld2_testi8a +subroutine vec_xld2_testi8a(arg1, arg2, res) + integer(1) :: arg1 + vector(integer(1)) :: arg2(4) + vector(integer(1)) :: res + res = vec_xld2(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i8) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath : (!fir.ref>) -> vector<2xf64> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<16xi8> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <16 x i8> +! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16 +end subroutine vec_xld2_testi8a + +! CHECK-LABEL: @vec_xld2_testi16 +subroutine vec_xld2_testi16(arg1, arg2, res) + integer :: arg1 + vector(integer(2)) :: arg2 + vector(integer(2)) :: res + res = vec_xld2(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath : (!fir.ref>) -> vector<2xf64> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<8xi16> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <8 x i16> +! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16 +end subroutine vec_xld2_testi16 + +! CHECK-LABEL: @vec_xld2_testi32a +subroutine vec_xld2_testi32a(arg1, arg2, res) + integer(4) :: arg1 + vector(integer(4)) :: arg2(41) + vector(integer(4)) :: res + res = vec_xld2(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath : (!fir.ref>) -> vector<2xf64> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xi32> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xi32>) -> !fir.vector<4:i32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <4 x i32> +! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16 +end subroutine vec_xld2_testi32a + +! CHECK-LABEL: @vec_xld2_testi64a +subroutine vec_xld2_testi64a(arg1, arg2, res) + integer(8) :: arg1 + vector(integer(8)) :: arg2(4) + vector(integer(8)) :: res + res = vec_xld2(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i64) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath : (!fir.ref>) -> vector<2xf64> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<2xi64> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xi64>) -> !fir.vector<2:i64> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <2 x i64> +! LLVMIR: store <2 x i64> %[[bc]], ptr %2, align 16 +end subroutine vec_xld2_testi64a + +! CHECK-LABEL: @vec_xld2_testf32a +subroutine vec_xld2_testf32a(arg1, arg2, res) + integer(2) :: arg1 + vector(real(4)) :: arg2(4) + vector(real(4)) :: res + res = vec_xld2(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath : (!fir.ref>) -> vector<2xf64> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xf32> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]]) +! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <4 x float> +! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 +end subroutine vec_xld2_testf32a + +! CHECK-LABEL: @vec_xld2_testf64a +subroutine vec_xld2_testf64a(arg1, arg2, res) + integer(8) :: arg1 + vector(real(8)) :: arg2(4) + vector(real(8)) :: res + res = vec_xld2(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i64) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath : (!fir.ref>) -> vector<2xf64> +! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]]) +! LLVMIR: store <2 x double> %[[ld]], ptr %2, align 16 +end subroutine vec_xld2_testf64a + +!---------------------- +! vec_xlw4 +!---------------------- + +! CHECK-LABEL: @vec_xlw4_testi8a +subroutine vec_xlw4_testi8a(arg1, arg2, res) + integer(1) :: arg1 + vector(integer(1)) :: arg2(2, 4, 8) + vector(integer(1)) :: res + res = vec_xlw4(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i8) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<16xi8> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]]) +! LLVMIR: %[[res:.*]] = bitcast <4 x i32> %[[ld]] to <16 x i8> +! LLVMIR: store <16 x i8> %[[res]], ptr %2, align 16 +end subroutine vec_xlw4_testi8a + +! CHECK-LABEL: @vec_xlw4_testi16a +subroutine vec_xlw4_testi16a(arg1, arg2, res) + integer(2) :: arg1 + vector(integer(2)) :: arg2(2, 4, 8) + vector(integer(2)) :: res + res = vec_xlw4(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<8xi16> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]]) +! LLVMIR: %[[res:.*]] = bitcast <4 x i32> %[[ld]] to <8 x i16> +! LLVMIR: store <8 x i16> %[[res]], ptr %2, align 16 +end subroutine vec_xlw4_testi16a + +! CHECK-LABEL: @vec_xlw4_testu32a +subroutine vec_xlw4_testu32a(arg1, arg2, res) + integer(4) :: arg1 + vector(unsigned(4)) :: arg2(2, 4, 8) + vector(unsigned(4)) :: res + res = vec_xlw4(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i32) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:ui32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]]) +! LLVMIR: store <4 x i32> %[[ld]], ptr %2, align 16 +end subroutine vec_xlw4_testu32a + +! CHECK-LABEL: @vec_xlw4_testf32a +subroutine vec_xlw4_testf32a(arg1, arg2, res) + integer(2) :: arg1 + vector(real(4)) :: arg2(4) + vector(real(4)) :: res + res = vec_xlw4(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref +! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref>>) -> !fir.ref> +! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref>, i16) -> !fir.ref> +! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath : (!fir.ref>) -> vector<4xi32> +! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32> +! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[res]] to %arg2 : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2 +! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]] +! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]]) +! LLVMIR: %[[res:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float> +! LLVMIR: store <4 x float> %[[res]], ptr %2, align 16 +end subroutine vec_xlw4_testf32a