Index: flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h =================================================================== --- flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h +++ flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h @@ -41,8 +41,15 @@ Sr, Srl, Sro, + St, + Ste, + Stxv, Sub, - Xor + Xor, + Xst, + Xst_be, + Xstd2, + Xstw4 }; /// Enums used to templatize and share lowering of PowerPC MMA intrinsics. @@ -172,6 +179,12 @@ fir::ExtendedValue genVecSel(mlir::Type resultType, llvm::ArrayRef args); + + template + void genVecStore(llvm::ArrayRef); + + template + void genVecXStore(llvm::ArrayRef); }; const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name); Index: flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp =================================================================== --- flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp +++ flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp @@ -180,6 +180,21 @@ &PI::genVecShift), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, + {"__ppc_vec_st", + static_cast( + &PI::genVecStore), + {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, + /*isElemental=*/false}, + {"__ppc_vec_ste", + static_cast( + &PI::genVecStore), + {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, + /*isElemental=*/false}, + {"__ppc_vec_stxv", + static_cast( + &PI::genVecXStore), + {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, + /*isElemental=*/false}, {"__ppc_vec_sub", static_cast( &PI::genVecAddAndMulSubXor), @@ -190,6 +205,26 @@ &PI::genVecAddAndMulSubXor), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, + {"__ppc_vec_xst", + static_cast( + &PI::genVecXStore), + {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, + /*isElemental=*/false}, + {"__ppc_vec_xst_be", + static_cast( + &PI::genVecXStore), + {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, + /*isElemental=*/false}, + {"__ppc_vec_xstd2_", + static_cast( + &PI::genVecXStore), + {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, + /*isElemental=*/false}, + {"__ppc_vec_xstw4_", + static_cast( + &PI::genVecXStore), + {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, + /*isElemental=*/false}, }; static constexpr MathOperation ppcMathOperations[] = { @@ -1371,4 +1406,184 @@ } } +static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value baseAddr, + mlir::Value offset) { + // Need to convert arg1 and the result of CoordinateOp to !fir.ref + + auto typeExtent{fir::SequenceType::getUnknownExtent()}; + // Construct an !fir.ref> type + auto arrRefTy{builder.getRefType(fir::SequenceType::get( + {typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))}; + auto resAddr{builder.create(loc, arrRefTy, baseAddr)}; + + return builder.create(loc, arrRefTy, resAddr, offset); +} + +static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value v, + int64_t len) { + assert(v.getType().isa()); + assert(len > 0); + llvm::SmallVector mask; + for (int64_t i = 0; i < len; ++i) { + mask.push_back(len - 1 - i); + } + auto undefVec{builder.create(loc, v.getType())}; + return builder.create(loc, v, undefVec, mask); +} + +// VEC_ST, VEC_STE +template +void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef args) { + assert(args.size() == 3); + + auto context{builder.getContext()}; + auto argBases{getBasesForArgs(args)}; + auto arg1TyInfo{getVecTypeFromFir(argBases[0])}; + + auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])}; + + llvm::StringRef fname{}; + mlir::VectorType stTy{nullptr}; + auto i32ty{mlir::IntegerType::get(context, 32)}; + switch (vop) { + case VecOp::St: + stTy = mlir::VectorType::get(4, i32ty); + fname = "llvm.ppc.altivec.stvx"; + break; + case VecOp::Ste: { + const auto width{arg1TyInfo.eleTy.getIntOrFloatBitWidth()}; + const auto len{arg1TyInfo.len}; + + if (arg1TyInfo.isFloat32()) { + stTy = mlir::VectorType::get(len, i32ty); + fname = "llvm.ppc.altivec.stvewx"; + } else if (arg1TyInfo.eleTy.isa()) { + stTy = mlir::VectorType::get(len, mlir::IntegerType::get(context, width)); + + switch (width) { + case 8: + fname = "llvm.ppc.altivec.stvebx"; + break; + case 16: + fname = "llvm.ppc.altivec.stvehx"; + break; + case 32: + fname = "llvm.ppc.altivec.stvewx"; + break; + default: + assert("invalid element size"); + } + } else + assert("unknown type"); + break; + } + default: + llvm_unreachable("invalid vector operation for generator"); + } + + auto funcType{ + mlir::FunctionType::get(context, {stTy, addr.getType()}, std::nullopt)}; + mlir::func::FuncOp funcOp = builder.addNamedFunction(loc, fname, funcType); + + llvm::SmallVector biArgs; + + mlir::Value newArg1; + auto vecTyInfo{getVecTypeFromFirType(argBases[0].getType())}; + auto cnv{builder.createConvert(loc, vecTyInfo.toMlirVectorType(context), + argBases[0])}; + + if (stTy != arg1TyInfo.toMlirVectorType(context)) + newArg1 = builder.create(loc, stTy, cnv); + else + newArg1 = cnv; + + // TODO: handle element order + + biArgs.push_back(newArg1); + biArgs.push_back(addr); + + builder.create(loc, funcOp, biArgs); +} + +static mlir::NamedAttribute getAlignmentAttr(fir::FirOpBuilder &builder, + const int val) { + auto i64ty{mlir::IntegerType::get(builder.getContext(), 64)}; + auto alignAttr{mlir::IntegerAttr::get(i64ty, val)}; + return builder.getNamedAttr("alignment", alignAttr); +} + +// VEC_XST, VEC_XST_BE, VEC_STXV, VEC_XSTD2, VEC_XSTW4 +template +void PPCIntrinsicLibrary::genVecXStore( + llvm::ArrayRef args) { + assert(args.size() == 3); + auto context{builder.getContext()}; + auto argBases{getBasesForArgs(args)}; + VecTypeInfo arg1TyInfo{getVecTypeFromFir(argBases[0])}; + + auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])}; + + mlir::Value trg{nullptr}; + mlir::Value src{nullptr}; + + switch (vop) { + case VecOp::Xst: + case VecOp::Xst_be: { + src = argBases[0]; + trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()), + addr); + // TODO: handle element order + if (vop == VecOp::Xst_be) { + auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context), + argBases[0])}; + auto shf{reverseVectorElements(builder, loc, cnv, arg1TyInfo.len)}; + + src = builder.createConvert(loc, arg1TyInfo.toFirVectorType(), shf); + } + break; + } + case VecOp::Xstd2: + case VecOp::Xstw4: { + // an 16-byte vector arg1 is treated as two 8-byte elements or + // four 4-byte elements + mlir::IntegerType elemTy; + uint64_t numElem = (vop == VecOp::Xstd2) ? 2 : 4; + elemTy = builder.getIntegerType(128 / numElem); + + mlir::VectorType mlirVecTy{mlir::VectorType::get(numElem, elemTy)}; + fir::VectorType firVecTy{fir::VectorType::get(numElem, elemTy)}; + + auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context), + argBases[0])}; + + mlir::Type srcTy{nullptr}; + if (numElem != arg1TyInfo.len) { + cnv = builder.create(loc, mlirVecTy, cnv); + srcTy = firVecTy; + } else { + srcTy = arg1TyInfo.toFirVectorType(); + } + + trg = builder.createConvert(loc, builder.getRefType(srcTy), addr); + + // TODO: handle element order + + src = builder.createConvert(loc, srcTy, cnv); + break; + } + case VecOp::Stxv: + src = argBases[0]; + trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()), + addr); + break; + default: + assert("Invalid vector operation for generator"); + } + builder.create(loc, mlir::TypeRange{}, + mlir::ValueRange{src, trg}, + getAlignmentAttr(builder, 1)); +} + } // namespace fir Index: flang/lib/Optimizer/Dialect/FIROps.cpp =================================================================== --- flang/lib/Optimizer/Dialect/FIROps.cpp +++ flang/lib/Optimizer/Dialect/FIROps.cpp @@ -1070,7 +1070,7 @@ return emitOpError("cannot find coordinate with unknown extents"); } if (!(fir::isa_aggregate(eleTy) || fir::isa_complex(eleTy) || - fir::isa_char_string(eleTy))) + fir::isa_char_string(eleTy) || eleTy.isInteger(8))) return emitOpError("cannot apply to this element type"); } auto eleTy = fir::dyn_cast_ptrOrBoxEleTy(refTy); @@ -1120,6 +1120,8 @@ } else if (auto t = eleTy.dyn_cast()) { // FIXME: This is the same as the tuple case. return mlir::success(); + } else if (eleTy.isInteger(8)) { + return mlir::success(); } else if (auto t = eleTy.dyn_cast()) { eleTy = t.getElementType(); } else if (auto t = eleTy.dyn_cast()) { Index: flang/module/__ppc_intrinsics.f90 =================================================================== --- flang/module/__ppc_intrinsics.f90 +++ flang/module/__ppc_intrinsics.f90 @@ -266,6 +266,81 @@ #undef ELEM_FUNC_VUVUVUVU #undef ELEM_FUNC_VIVIVIVU +!! ================ 3 argument subroutine interfaces ================================= +! subroutine(vector(i), i, vector(i)) +#define SUB_VIIVI(VKIND) \ + pure subroutine sub_vi##VKIND##ivi##VKIND(arg1, arg2, arg3); \ + vector(integer(VKIND)), intent(in) :: arg1; \ + integer(8), intent(in) :: arg2; \ + !dir$ ignore_tkr(k) arg2; \ + vector(integer(VKIND)), intent(in) :: arg3; \ + !dir$ ignore_tkr(r) arg3; \ + end subroutine ; + +! subroutine(vector(u), i, vector(u)) +#define SUB_VUIVU(VKIND) \ + pure subroutine sub_vu##VKIND##ivu##VKIND(arg1, arg2, arg3); \ + vector(unsigned(VKIND)), intent(in) :: arg1; \ + integer(8), intent(in) :: arg2; \ + !dir$ ignore_tkr(k) arg2; \ + vector(unsigned(VKIND)), intent(in) :: arg3; \ + !dir$ ignore_tkr(r) arg3; \ + end subroutine ; + +! subroutine(vector(r), i, vector(r)) +#define SUB_VRIVR(VKIND) \ + pure subroutine sub_vr##VKIND##ivr##VKIND(arg1, arg2, arg3); \ + vector(real(VKIND)), intent(in) :: arg1; \ + integer(8), intent(in) :: arg2; \ + !dir$ ignore_tkr(k) arg2; \ + vector(real(VKIND)), intent(in) :: arg3; \ + !dir$ ignore_tkr(r) arg3; \ + end subroutine ; + +! subroutine(vector(i), i, i) +#define SUB_VIII(VKIND) \ + pure subroutine sub_vi##VKIND##ii##VKIND(arg1, arg2, arg3); \ + vector(integer(VKIND)), intent(in) :: arg1; \ + integer(8), intent(in) :: arg2; \ + !dir$ ignore_tkr(k) arg2; \ + integer(VKIND), intent(out) :: arg3; \ + !dir$ ignore_tkr(r) arg3; \ + end subroutine ; + +! subroutine(vector(u), i, i) +#define SUB_VUII(VKIND) \ + pure subroutine sub_vu##VKIND##ii##VKIND(arg1, arg2, arg3); \ + vector(unsigned(VKIND)), intent(in) :: arg1; \ + integer(8), intent(in) :: arg2; \ + !dir$ ignore_tkr(k) arg2; \ + integer(VKIND), intent(out) :: arg3; \ + !dir$ ignore_tkr(r) arg3; \ + end subroutine ; + +! subroutine(vector(r), i, r) +#define SUB_VRIR(VKIND) \ + pure subroutine sub_vr##VKIND##ir##VKIND(arg1, arg2, arg3); \ + vector(real(VKIND)), intent(in) :: arg1; \ + integer(8), intent(in) :: arg2; \ + !dir$ ignore_tkr(k) arg2; \ + real(VKIND), intent(out) :: arg3; \ + !dir$ ignore_tkr(r) arg3; \ + end subroutine ; + + SUB_VIIVI(1) SUB_VIIVI(2) SUB_VIIVI(4) SUB_VIIVI(8) + SUB_VUIVU(1) SUB_VUIVU(2) SUB_VUIVU(4) SUB_VUIVU(8) + SUB_VRIVR(4) SUB_VRIVR(8) + SUB_VIII(1) SUB_VIII(2) SUB_VIII(4) SUB_VIII(8) + SUB_VUII(1) SUB_VUII(2) SUB_VUII(4) SUB_VUII(8) + SUB_VRIR(4) SUB_VRIR(8) + +#undef SUB_VRIR +#undef SUB_VUII +#undef SUB_VIII +#undef SUB_VRIVR +#undef SUB_VUIVU +#undef SUB_VIIVI + end interface procedure(func_r4r4r4r4) :: __ppc_fmadd_r4 @@ -908,4 +983,155 @@ #undef VR_VU_I #undef VR_VI_I +!-------------------------------------------------- +! subroutine(vector, integer, vector/integer/real) +!-------------------------------------------------- +! 'i0' stands for the integer argument being ignored via +! the `ignore_tkr' directive. +#define SUB_VI_I_VI(NAME, VKIND) __ppc_##NAME##_vi##VKIND##i0vi##VKIND +#define SUB_VU_I_VU(NAME, VKIND) __ppc_##NAME##_vu##VKIND##i0vu##VKIND +#define SUB_VR_I_VR(NAME, VKIND) __ppc_##NAME##_vr##VKIND##i0vr##VKIND +#define SUB_VI_I_I(NAME, VKIND) __ppc_##NAME##_vi##VKIND##i0i##VKIND +#define SUB_VU_I_I(NAME, VKIND) __ppc_##NAME##_vu##VKIND##i0u##VKIND +#define SUB_VR_I_R(NAME, VKIND) __ppc_##NAME##_vr##VKIND##i0r##VKIND + +#define VEC_SUB_VI_I_VI(NAME, VKIND) \ + procedure(sub_vi##VKIND##ivi##VKIND) :: SUB_VI_I_VI(NAME, VKIND); +#define VEC_SUB_VU_I_VU(NAME, VKIND) \ + procedure(sub_vu##VKIND##ivu##VKIND) :: SUB_VU_I_VU(NAME, VKIND); +#define VEC_SUB_VR_I_VR(NAME, VKIND) \ + procedure(sub_vr##VKIND##ivr##VKIND) :: SUB_VR_I_VR(NAME, VKIND); +#define VEC_SUB_VI_I_I(NAME, VKIND) \ + procedure(sub_vi##VKIND##ii##VKIND) :: SUB_VI_I_I(NAME, VKIND); +#define VEC_SUB_VU_I_I(NAME, VKIND) \ + procedure(sub_vu##VKIND##ii##VKIND) :: SUB_VU_I_I(NAME, VKIND); +#define VEC_SUB_VR_I_R(NAME, VKIND) \ + procedure(sub_vr##VKIND##ir##VKIND) :: SUB_VR_I_R(NAME, VKIND); + +! vec_st + VEC_SUB_VI_I_VI(vec_st,1) VEC_SUB_VI_I_VI(vec_st,2) VEC_SUB_VI_I_VI(vec_st,4) + VEC_SUB_VU_I_VU(vec_st,1) VEC_SUB_VU_I_VU(vec_st,2) VEC_SUB_VU_I_VU(vec_st,4) + VEC_SUB_VR_I_VR(vec_st,4) + VEC_SUB_VI_I_I(vec_st,1) VEC_SUB_VI_I_I(vec_st,2) VEC_SUB_VI_I_I(vec_st,4) + VEC_SUB_VU_I_I(vec_st,1) VEC_SUB_VU_I_I(vec_st,2) VEC_SUB_VU_I_I(vec_st,4) + VEC_SUB_VR_I_R(vec_st,4) + interface vec_st + procedure :: SUB_VI_I_VI(vec_st,1), SUB_VI_I_VI(vec_st,2), SUB_VI_I_VI(vec_st,4) + procedure :: SUB_VU_I_VU(vec_st,1), SUB_VU_I_VU(vec_st,2), SUB_VU_I_VU(vec_st,4) + procedure :: SUB_VR_I_VR(vec_st,4) + procedure :: SUB_VI_I_I(vec_st,1), SUB_VI_I_I(vec_st,2), SUB_VI_I_I(vec_st,4) + procedure :: SUB_VU_I_I(vec_st,1), SUB_VU_I_I(vec_st,2), SUB_VU_I_I(vec_st,4) + procedure :: SUB_VR_I_R(vec_st,4) + end interface vec_st + public :: vec_st + +! vec_ste + VEC_SUB_VI_I_I(vec_ste,1) VEC_SUB_VI_I_I(vec_ste,2) VEC_SUB_VI_I_I(vec_ste,4) + VEC_SUB_VU_I_I(vec_ste,1) VEC_SUB_VU_I_I(vec_ste,2) VEC_SUB_VU_I_I(vec_ste,4) + VEC_SUB_VR_I_R(vec_ste,4) + interface vec_ste + procedure :: SUB_VI_I_I(vec_ste,1), SUB_VI_I_I(vec_ste,2), SUB_VI_I_I(vec_ste,4) + procedure :: SUB_VU_I_I(vec_ste,1), SUB_VU_I_I(vec_ste,2), SUB_VU_I_I(vec_ste,4) + procedure :: SUB_VR_I_R(vec_ste,4) + end interface vec_ste + public :: vec_ste + +! vec_stxv + VEC_SUB_VI_I_VI(vec_stxv,1) VEC_SUB_VI_I_VI(vec_stxv,2) VEC_SUB_VI_I_VI(vec_stxv,4) VEC_SUB_VI_I_VI(vec_stxv,8) + VEC_SUB_VU_I_VU(vec_stxv,1) VEC_SUB_VU_I_VU(vec_stxv,2) VEC_SUB_VU_I_VU(vec_stxv,4) VEC_SUB_VU_I_VU(vec_stxv,8) + VEC_SUB_VR_I_VR(vec_stxv,4) VEC_SUB_VR_I_VR(vec_stxv,8) + VEC_SUB_VI_I_I(vec_stxv,1) VEC_SUB_VI_I_I(vec_stxv,2) VEC_SUB_VI_I_I(vec_stxv,4) VEC_SUB_VI_I_I(vec_stxv,8) + VEC_SUB_VU_I_I(vec_stxv,1) VEC_SUB_VU_I_I(vec_stxv,2) VEC_SUB_VU_I_I(vec_stxv,4) VEC_SUB_VU_I_I(vec_stxv,8) + VEC_SUB_VR_I_R(vec_stxv,4) VEC_SUB_VR_I_R(vec_stxv,8) + interface vec_stxv + procedure :: SUB_VI_I_VI(vec_stxv,1), SUB_VI_I_VI(vec_stxv,2), SUB_VI_I_VI(vec_stxv,4), SUB_VI_I_VI(vec_stxv,8) + procedure :: SUB_VU_I_VU(vec_stxv,1), SUB_VU_I_VU(vec_stxv,2), SUB_VU_I_VU(vec_stxv,4), SUB_VU_I_VU(vec_stxv,8) + procedure :: SUB_VR_I_VR(vec_stxv,4), SUB_VR_I_VR(vec_stxv,8) + procedure :: SUB_VI_I_I(vec_stxv,1), SUB_VI_I_I(vec_stxv,2), SUB_VI_I_I(vec_stxv,4), SUB_VI_I_I(vec_stxv,8) + procedure :: SUB_VU_I_I(vec_stxv,1), SUB_VU_I_I(vec_stxv,2), SUB_VU_I_I(vec_stxv,4), SUB_VU_I_I(vec_stxv,8) + procedure :: SUB_VR_I_R(vec_stxv,4), SUB_VR_I_R(vec_stxv,8) + end interface vec_stxv + public :: vec_stxv + +! vec_xst + VEC_SUB_VI_I_VI(vec_xst,1) VEC_SUB_VI_I_VI(vec_xst,2) VEC_SUB_VI_I_VI(vec_xst,4) VEC_SUB_VI_I_VI(vec_xst,8) + VEC_SUB_VU_I_VU(vec_xst,1) VEC_SUB_VU_I_VU(vec_xst,2) VEC_SUB_VU_I_VU(vec_xst,4) VEC_SUB_VU_I_VU(vec_xst,8) + VEC_SUB_VR_I_VR(vec_xst,4) VEC_SUB_VR_I_VR(vec_xst,8) + VEC_SUB_VI_I_I(vec_xst,1) VEC_SUB_VI_I_I(vec_xst,2) VEC_SUB_VI_I_I(vec_xst,4) VEC_SUB_VI_I_I(vec_xst,8) + VEC_SUB_VU_I_I(vec_xst,1) VEC_SUB_VU_I_I(vec_xst,2) VEC_SUB_VU_I_I(vec_xst,4) VEC_SUB_VU_I_I(vec_xst,8) + VEC_SUB_VR_I_R(vec_xst,4) VEC_SUB_VR_I_R(vec_xst,8) + interface vec_xst + procedure :: SUB_VI_I_VI(vec_xst,1), SUB_VI_I_VI(vec_xst,2), SUB_VI_I_VI(vec_xst,4), SUB_VI_I_VI(vec_xst,8) + procedure :: SUB_VU_I_VU(vec_xst,1), SUB_VU_I_VU(vec_xst,2), SUB_VU_I_VU(vec_xst,4), SUB_VU_I_VU(vec_xst,8) + procedure :: SUB_VR_I_VR(vec_xst,4), SUB_VR_I_VR(vec_xst,8) + procedure :: SUB_VI_I_I(vec_xst,1), SUB_VI_I_I(vec_xst,2), SUB_VI_I_I(vec_xst,4), SUB_VI_I_I(vec_xst,8) + procedure :: SUB_VU_I_I(vec_xst,1), SUB_VU_I_I(vec_xst,2), SUB_VU_I_I(vec_xst,4), SUB_VU_I_I(vec_xst,8) + procedure :: SUB_VR_I_R(vec_xst,4), SUB_VR_I_R(vec_xst,8) + end interface vec_xst + public :: vec_xst + +! vec_xst_be + VEC_SUB_VI_I_VI(vec_xst_be,1) VEC_SUB_VI_I_VI(vec_xst_be,2) VEC_SUB_VI_I_VI(vec_xst_be,4) VEC_SUB_VI_I_VI(vec_xst_be,8) + VEC_SUB_VU_I_VU(vec_xst_be,1) VEC_SUB_VU_I_VU(vec_xst_be,2) VEC_SUB_VU_I_VU(vec_xst_be,4) VEC_SUB_VU_I_VU(vec_xst_be,8) + VEC_SUB_VR_I_VR(vec_xst_be,4) VEC_SUB_VR_I_VR(vec_xst_be,8) + VEC_SUB_VI_I_I(vec_xst_be,1) VEC_SUB_VI_I_I(vec_xst_be,2) VEC_SUB_VI_I_I(vec_xst_be,4) VEC_SUB_VI_I_I(vec_xst_be,8) + VEC_SUB_VU_I_I(vec_xst_be,1) VEC_SUB_VU_I_I(vec_xst_be,2) VEC_SUB_VU_I_I(vec_xst_be,4) VEC_SUB_VU_I_I(vec_xst_be,8) + VEC_SUB_VR_I_R(vec_xst_be,4) VEC_SUB_VR_I_R(vec_xst_be,8) + interface vec_xst_be + procedure :: SUB_VI_I_VI(vec_xst_be,1), SUB_VI_I_VI(vec_xst_be,2), SUB_VI_I_VI(vec_xst_be,4), SUB_VI_I_VI(vec_xst_be,8) + procedure :: SUB_VU_I_VU(vec_xst_be,1), SUB_VU_I_VU(vec_xst_be,2), SUB_VU_I_VU(vec_xst_be,4), SUB_VU_I_VU(vec_xst_be,8) + procedure :: SUB_VR_I_VR(vec_xst_be,4), SUB_VR_I_VR(vec_xst_be,8) + procedure :: SUB_VI_I_I(vec_xst_be,1), SUB_VI_I_I(vec_xst_be,2), SUB_VI_I_I(vec_xst_be,4), SUB_VI_I_I(vec_xst_be,8) + procedure :: SUB_VU_I_I(vec_xst_be,1), SUB_VU_I_I(vec_xst_be,2), SUB_VU_I_I(vec_xst_be,4), SUB_VU_I_I(vec_xst_be,8) + procedure :: SUB_VR_I_R(vec_xst_be,4), SUB_VR_I_R(vec_xst_be,8) + end interface vec_xst_be + public :: vec_xst_be + +! vec_xstd2 + VEC_SUB_VI_I_VI(vec_xstd2_,1) VEC_SUB_VI_I_VI(vec_xstd2_,2) VEC_SUB_VI_I_VI(vec_xstd2_,4) VEC_SUB_VI_I_VI(vec_xstd2_,8) + VEC_SUB_VU_I_VU(vec_xstd2_,1) VEC_SUB_VU_I_VU(vec_xstd2_,2) VEC_SUB_VU_I_VU(vec_xstd2_,4) VEC_SUB_VU_I_VU(vec_xstd2_,8) + VEC_SUB_VR_I_VR(vec_xstd2_,4) VEC_SUB_VR_I_VR(vec_xstd2_,8) + VEC_SUB_VI_I_I(vec_xstd2_,1) VEC_SUB_VI_I_I(vec_xstd2_,2) VEC_SUB_VI_I_I(vec_xstd2_,4) VEC_SUB_VI_I_I(vec_xstd2_,8) + VEC_SUB_VU_I_I(vec_xstd2_,1) VEC_SUB_VU_I_I(vec_xstd2_,2) VEC_SUB_VU_I_I(vec_xstd2_,4) VEC_SUB_VU_I_I(vec_xstd2_,8) + VEC_SUB_VR_I_R(vec_xstd2_,4) VEC_SUB_VR_I_R(vec_xstd2_,8) + interface vec_xstd2 + procedure :: SUB_VI_I_VI(vec_xstd2_,1), SUB_VI_I_VI(vec_xstd2_,2), SUB_VI_I_VI(vec_xstd2_,4), SUB_VI_I_VI(vec_xstd2_,8) + procedure :: SUB_VU_I_VU(vec_xstd2_,1), SUB_VU_I_VU(vec_xstd2_,2), SUB_VU_I_VU(vec_xstd2_,4), SUB_VU_I_VU(vec_xstd2_,8) + procedure :: SUB_VR_I_VR(vec_xstd2_,4), SUB_VR_I_VR(vec_xstd2_,8) + procedure :: SUB_VI_I_I(vec_xstd2_,1), SUB_VI_I_I(vec_xstd2_,2), SUB_VI_I_I(vec_xstd2_,4), SUB_VI_I_I(vec_xstd2_,8) + procedure :: SUB_VU_I_I(vec_xstd2_,1), SUB_VU_I_I(vec_xstd2_,2), SUB_VU_I_I(vec_xstd2_,4), SUB_VU_I_I(vec_xstd2_,8) + procedure :: SUB_VR_I_R(vec_xstd2_,4), SUB_VR_I_R(vec_xstd2_,8) + end interface vec_xstd2 + public :: vec_xstd2 + +! vec_xstw4 + VEC_SUB_VI_I_VI(vec_xstw4_,1) VEC_SUB_VI_I_VI(vec_xstw4_,2) VEC_SUB_VI_I_VI(vec_xstw4_,4) + VEC_SUB_VU_I_VU(vec_xstw4_,1) VEC_SUB_VU_I_VU(vec_xstw4_,2) VEC_SUB_VU_I_VU(vec_xstw4_,4) + VEC_SUB_VR_I_VR(vec_xstw4_,4) + VEC_SUB_VI_I_I(vec_xstw4_,1) VEC_SUB_VI_I_I(vec_xstw4_,2) VEC_SUB_VI_I_I(vec_xstw4_,4) + VEC_SUB_VU_I_I(vec_xstw4_,1) VEC_SUB_VU_I_I(vec_xstw4_,2) VEC_SUB_VU_I_I(vec_xstw4_,4) + VEC_SUB_VR_I_R(vec_xstw4_,4) + interface vec_xstw4 + procedure :: SUB_VI_I_VI(vec_xstw4_,1), SUB_VI_I_VI(vec_xstw4_,2), SUB_VI_I_VI(vec_xstw4_,4) + procedure :: SUB_VU_I_VU(vec_xstw4_,1), SUB_VU_I_VU(vec_xstw4_,2), SUB_VU_I_VU(vec_xstw4_,4) + procedure :: SUB_VR_I_VR(vec_xstw4_,4) + procedure :: SUB_VI_I_I(vec_xstw4_,1), SUB_VI_I_I(vec_xstw4_,2), SUB_VI_I_I(vec_xstw4_,4) + procedure :: SUB_VU_I_I(vec_xstw4_,1), SUB_VU_I_I(vec_xstw4_,2), SUB_VU_I_I(vec_xstw4_,4) + procedure :: SUB_VR_I_R(vec_xstw4_,4) + end interface vec_xstw4 + public :: vec_xstw4 + +#undef VEC_SUB_VI_I_VI +#undef VEC_SUB_VU_I_VU +#undef VEC_SUB_VR_I_VR +#undef VEC_SUB_VI_I_I +#undef VEC_SUB_VU_I_I +#undef VEC_SUB_VR_I_R +#undef SUB_VI_I_VI +#undef SUB_VU_I_VU +#undef SUB_VR_I_VR +#undef SUB_VI_I_I +#undef SUB_VU_I_I +#undef SUB_VR_Ik_R + end module __ppc_intrinsics Index: flang/test/Lower/PowerPC/ppc-vec-store.f90 =================================================================== --- /dev/null +++ flang/test/Lower/PowerPC/ppc-vec-store.f90 @@ -0,0 +1,1207 @@ +! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s +! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s +! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s +! REQUIRES: target=powerpc{{.*}} + +!---------------------- +! vec_st +!---------------------- + +! CHECK-LABEL: vec_st_vi1i2vi1 +subroutine vec_st_vi1i2vi1(arg1, arg2, arg3) + vector(integer(1)) :: arg1, arg3 + integer(2) :: arg2 + call vec_st(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i16) -> !fir.ref> +! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[bcArg1:.*]] = vector.bitcast %[[cnvArg1]] : vector<16xi8> to vector<4xi32> +! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) fastmath : (vector<4xi32>, !fir.ref>) -> () + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i16) -> !llvm.ptr +! CHECK-LLVMIR: %[[bcArg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32> +! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, !llvm.ptr) -> () + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 +! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5 +! CHECK: %[[bcArg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32> +! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]]) +end subroutine vec_st_vi1i2vi1 + +! CHECK-LABEL: vec_st_vi2i2vi2 +subroutine vec_st_vi2i2vi2(arg1, arg2, arg3) + vector(integer(2)) :: arg1, arg3 + integer(2) :: arg2 + call vec_st(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i16) -> !fir.ref> +! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[bcArg1:.*]] = vector.bitcast %[[cnvArg1]] : vector<8xi16> to vector<4xi32> +! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) fastmath : (vector<4xi32>, !fir.ref>) -> () + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i16) -> !llvm.ptr +! CHECK-LLVMIR: %[[bcArg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32> +! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, !llvm.ptr) -> () + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 +! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5 +! CHECK: %[[bcArg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32> +! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]]) +end subroutine vec_st_vi2i2vi2 + +! CHECK-LABEL: vec_st_vi4i2vi4 +subroutine vec_st_vi4i2vi4(arg1, arg2, arg3) + vector(integer(4)) :: arg1, arg3 + integer(2) :: arg2 + call vec_st(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i16) -> !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[varg1]], %[[addr]]) fastmath : (vector<4xi32>, !fir.ref>) -> () + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i16) -> !llvm.ptr +! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, !llvm.ptr) -> () + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 +! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5 +! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[arg1]], ptr %[[arg3]]) +end subroutine vec_st_vi4i2vi4 + +! CHECK-LABEL: vec_st_vu1i4vu1 +subroutine vec_st_vu1i4vu1(arg1, arg2, arg3) + vector(unsigned(1)) :: arg1, arg3 + integer(4) :: arg2 + call vec_st(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[bcArg1:.*]] = vector.bitcast %[[cnvArg1]] : vector<16xi8> to vector<4xi32> +! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) fastmath : (vector<4xi32>, !fir.ref>) -> () + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: %[[bcArg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32> +! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, !llvm.ptr) -> () + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4 +! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5 +! CHECK: %[[bcArg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32> +! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]]) +end subroutine vec_st_vu1i4vu1 + +! CHECK-LABEL: vec_st_vu2i4vu2 +subroutine vec_st_vu2i4vu2(arg1, arg2, arg3) + vector(unsigned(2)) :: arg1, arg3 + integer(4) :: arg2 + call vec_st(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: %[[bcArg1:.*]] = vector.bitcast %[[cnvArg1]] : vector<8xi16> to vector<4xi32> +! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) fastmath : (vector<4xi32>, !fir.ref>) -> () + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: %[[bcArg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32> +! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, !llvm.ptr) -> () + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4 +! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5 +! CHECK: %[[bcArg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32> +! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]]) +end subroutine vec_st_vu2i4vu2 + +! CHECK-LABEL: vec_st_vu4i4vu4 +subroutine vec_st_vu4i4vu4(arg1, arg2, arg3) + vector(unsigned(4)) :: arg1, arg3 + integer(4) :: arg2 + call vec_st(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[varg1]], %[[addr]]) fastmath : (vector<4xi32>, !fir.ref>) -> () + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, !llvm.ptr) -> () + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4 +! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5 +! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[arg1]], ptr %[[arg3]]) +end subroutine vec_st_vu4i4vu4 + +! CHECK-LABEL: vec_st_vi4i4via4 +subroutine vec_st_vi4i4via4(arg1, arg2, arg3, i) + vector(integer(4)) :: arg1, arg3(5) + integer(4) :: arg2, i + call vec_st(arg1, arg2, arg3(i)) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[idx:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64 +! CHECK-FIR: %[[cnst:.*]] = arith.constant 1 : i64 +! CHECK-FIR: %[[cnstm1:.*]] = arith.subi %[[idx64]], %[[cnst]] : i64 +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %arg2, %[[cnstm1]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK-FIR: %[[ref:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[pos:.*]] = fir.coordinate_of %[[ref]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[varg1]], %[[pos]]) fastmath : (vector<4xi32>, !fir.ref>) -> () + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr +! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64 +! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64 +! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64 +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr>>, i64) -> !llvm.ptr> +! CHECK-LLVMIR: %[[bc:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[pos:.*]] = llvm.getelementptr %[[bc]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[arg1]], %[[pos]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, !llvm.ptr) -> () + +! CHECK: %5 = load <4 x i32>, ptr %0, align 16 +! CHECK: %6 = load i32, ptr %1, align 4 +! CHECK: %7 = load i32, ptr %3, align 4 +! CHECK: %8 = sext i32 %7 to i64 +! CHECK: %9 = sub i64 %8, 1 +! CHECK: %10 = getelementptr [5 x <4 x i32>], ptr %2, i32 0, i64 %9 +! CHECK: %11 = getelementptr i8, ptr %10, i32 %6 +! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %5, ptr %11) +end subroutine vec_st_vi4i4via4 + +!---------------------- +! vec_ste +!---------------------- + +! CHECK-LABEL: vec_ste_vi1i2i1 +subroutine vec_ste_vi1i2i1(arg1, arg2, arg3) + vector(integer(1)) :: arg1 + integer(2) :: arg2 + integer(1) :: arg3 + call vec_ste(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i16) -> !fir.ref> +! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: fir.call @llvm.ppc.altivec.stvebx(%[[cnvArg1]], %[[addr]]) fastmath : (vector<16xi8>, !fir.ref>) -> () + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3:.*]][%[[arg2]]] : (!llvm.ptr, i16) -> !llvm.ptr +! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvebx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath} : (vector<16xi8>, !llvm.ptr) -> () + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 +! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5 +! CHECK: call void @llvm.ppc.altivec.stvebx(<16 x i8> %[[arg1]], ptr %[[arg3]]) +end subroutine vec_ste_vi1i2i1 + +! CHECK-LABEL: vec_ste_vi2i2i2 +subroutine vec_ste_vi2i2i2(arg1, arg2, arg3) + vector(integer(2)) :: arg1 + integer(2) :: arg2 + integer(2) :: arg3 + call vec_ste(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i16) -> !fir.ref> +! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: fir.call @llvm.ppc.altivec.stvehx(%[[cnvArg1]], %[[addr]]) fastmath : (vector<8xi16>, !fir.ref>) -> () + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i16) -> !llvm.ptr +! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvehx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath} : (vector<8xi16>, !llvm.ptr) -> () + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 +! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5 +! CHECK: call void @llvm.ppc.altivec.stvehx(<8 x i16> %[[arg1]], ptr %[[arg3]]) +end subroutine vec_ste_vi2i2i2 + +! CHECK-LABEL: vec_ste_vi4i2i4 +subroutine vec_ste_vi4i2i4(arg1, arg2, arg3) + vector(integer(4)) :: arg1 + integer(2) :: arg2 + integer(4) :: arg3 + call vec_ste(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i16) -> !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: fir.call @llvm.ppc.altivec.stvewx(%[[varg1]], %[[addr]]) fastmath : (vector<4xi32>, !fir.ref>) -> () + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i16) -> !llvm.ptr +! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvewx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, !llvm.ptr) -> () + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 +! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5 +! CHECK: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[arg1]], ptr %[[arg3]]) +end subroutine vec_ste_vi4i2i4 + +! CHECK-LABEL: vec_ste_vu1i4u1 +subroutine vec_ste_vu1i4u1(arg1, arg2, arg3) + vector(unsigned(1)) :: arg1 + integer(4) :: arg2 + integer(1) :: arg3 + call vec_ste(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: fir.call @llvm.ppc.altivec.stvebx(%[[cnvArg1]], %[[addr]]) fastmath : (vector<16xi8>, !fir.ref>) -> () + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3:.*]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvebx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath} : (vector<16xi8>, !llvm.ptr) -> () + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4 +! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5 +! CHECK: call void @llvm.ppc.altivec.stvebx(<16 x i8> %[[arg1]], ptr %[[arg3]]) +end subroutine vec_ste_vu1i4u1 + +! CHECK-LABEL: vec_ste_vu2i4u2 +subroutine vec_ste_vu2i4u2(arg1, arg2, arg3) + vector(unsigned(2)) :: arg1 + integer(4) :: arg2 + integer(2) :: arg3 + call vec_ste(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: fir.call @llvm.ppc.altivec.stvehx(%[[cnvArg1]], %[[addr]]) fastmath : (vector<8xi16>, !fir.ref>) -> () + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvehx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath} : (vector<8xi16>, !llvm.ptr) -> () + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4 +! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5 +! CHECK: call void @llvm.ppc.altivec.stvehx(<8 x i16> %[[arg1]], ptr %[[arg3]]) +end subroutine vec_ste_vu2i4u2 + +! CHECK-LABEL: vec_ste_vu4i4u4 +subroutine vec_ste_vu4i4u4(arg1, arg2, arg3) + vector(unsigned(4)) :: arg1 + integer(4) :: arg2 + integer(4) :: arg3 + call vec_ste(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: fir.call @llvm.ppc.altivec.stvewx(%[[varg1]], %[[addr]]) fastmath : (vector<4xi32>, !fir.ref>) -> () + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvewx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, !llvm.ptr) -> () + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4 +! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5 +! CHECK: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[arg1]], ptr %[[arg3]]) +end subroutine vec_ste_vu4i4u4 + +! CHECK-LABEL: vec_ste_vr4i4r4 +subroutine vec_ste_vr4i4r4(arg1, arg2, arg3) + vector(real(4)) :: arg1 + integer(4) :: arg2 + real(4) :: arg3 + call vec_ste(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[pos:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[bc:.*]] = vector.bitcast %[[cnvArg1]] : vector<4xf32> to vector<4xi32> +! CHECK-FIR: fir.call @llvm.ppc.altivec.stvewx(%[[bc]], %[[pos]]) fastmath : (vector<4xi32>, !fir.ref>) -> () + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[pos:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: %[[bc:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<4xi32> +! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvewx(%[[bc]], %[[pos]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, !llvm.ptr) -> () + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4 +! CHECK: %[[pos:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]] +! CHECK: %[[bc:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32> +! CHECK: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[bc]], ptr %[[pos]]) + +end subroutine vec_ste_vr4i4r4 + +! CHECK-LABEL: vec_ste_vi4i4ia4 +subroutine vec_ste_vi4i4ia4(arg1, arg2, arg3, i) + vector(integer(4)) :: arg1 + integer(4) :: arg2, i + integer(4) :: arg3(5) + call vec_ste(arg1, arg2, arg3(i)) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[idx:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64 +! CHECK-FIR: %[[cnst:.*]] = arith.constant 1 : i64 +! CHECK-FIR: %[[cnstm1:.*]] = arith.subi %[[idx64]], %[[cnst]] : i64 +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %arg2, %[[cnstm1]] : (!fir.ref>, i64) -> !fir.ref +! CHECK-FIR: %[[ref:.*]] = fir.convert %[[addr]] : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[pos:.*]] = fir.coordinate_of %[[ref]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: fir.call @llvm.ppc.altivec.stvewx(%[[varg1]], %[[pos]]) fastmath : (vector<4xi32>, !fir.ref>) -> () + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr +! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64 +! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64 +! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64 +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr>, i64) -> !llvm.ptr +! CHECK-LLVMIR: %[[bc:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[pos:.*]] = llvm.getelementptr %[[bc]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvewx(%[[arg1]], %[[pos]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, !llvm.ptr) -> () + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4 +! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4 +! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64 +! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1 +! CHECK: %[[addr:.*]] = getelementptr [5 x i32], ptr %[[arg3:.*]], i32 0, i64 %[[idx64m1]] +! CHECK: %[[pos:.*]] = getelementptr i8, ptr %[[addr]], i32 %[[arg2]] +! CHECK: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[arg1]], ptr %[[pos]]) +end subroutine vec_ste_vi4i4ia4 + +!---------------------- +! vec_stxv +!---------------------- + +! CHECK-LABEL: vec_stxv_test_vr4i2r4 +subroutine vec_stxv_test_vr4i2r4(arg1, arg2, arg3) + vector(real(4)) :: arg1 + integer(2) :: arg2 + real(4) :: arg3 + call vec_stxv(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3ptr:.*]] = fir.convert %arg2 : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3ptr]], %[[arg2]] : (!fir.ref>, i16) -> !fir.ref> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3ptr]][%[[arg2]]] : (!llvm.ptr, i16) -> !llvm.ptr +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 +! CHECK: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]] +! CHECK: store <4 x float> %[[arg1]], ptr %[[addr]], align 1 +end subroutine vec_stxv_test_vr4i2r4 + +! CHECK-LABEL: vec_stxv_test_vi4i8ia4 +subroutine vec_stxv_test_vi4i8ia4(arg1, arg2, arg3, i) + vector(integer(4)) :: arg1 + integer(8) :: arg2 + integer(4) :: arg3(10) + integer(4) :: i + call vec_stxv(arg1, arg2, arg3(i)) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref +! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref +! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64 +! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64 +! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64 +! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %arg2, %[[idx64m1]] : (!fir.ref>, i64) -> !fir.ref +! CHECK-FIR: %[[elemref:.*]] = fir.convert %[[elem]] : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemref]], %[[arg2]] : (!fir.ref>, i64) -> !fir.ref> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr +! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64 +! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64 +! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64 +! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr>, i64) -> !llvm.ptr +! CHECK-LLVMIR: %[[elemref:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemref]][%[[arg2]]] : (!llvm.ptr, i64) -> !llvm.ptr +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i64, ptr %1, align 8 +! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4 +! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64 +! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1 +! CHECK: %[[elem:.*]] = getelementptr [10 x i32], ptr %2, i32 0, i64 %[[idx64m1]] +! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i64 %6 +! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1 +end subroutine vec_stxv_test_vi4i8ia4 + +! CHECK-LABEL: vec_stxv_test_vi2i4vi2 +subroutine vec_stxv_test_vi2i4vi2(arg1, arg2, arg3) + vector(integer(2)) :: arg1 + integer(4) :: arg2 + vector(integer(2)) :: arg3 + call vec_stxv(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4 +! CHECK: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]] +! CHECK: store <8 x i16> %[[arg1]], ptr %[[addr]], align 1 +end subroutine vec_stxv_test_vi2i4vi2 + +! CHECK-LABEL: vec_stxv_test_vi4i4vai4 +subroutine vec_stxv_test_vi4i4vai4(arg1, arg2, arg3, i) + vector(integer(4)) :: arg1 + integer(4) :: arg2 + vector(integer(4)) :: arg3(20) + integer(4) :: i + call vec_stxv(arg1, arg2, arg3(i)) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref +! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref +! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64 +! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64 +! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64 +! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %[[arg3:.*]], %[[idx64m1]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK-FIR: %[[elemptr:.*]] = fir.convert %[[elem]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr +! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64 +! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64 +! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64 +! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %[[arg3:.*]][0, %[[idx64m1]]] : (!llvm.ptr>>, i64) -> !llvm.ptr> +! CHECK-LLVMIR: %[[elemptr:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemptr]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4 +! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4 +! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64 +! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1 +! CHECK: %[[elem:.*]] = getelementptr [20 x <4 x i32>], ptr %2, i32 0, i64 %[[idx64m1]] +! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i32 %[[arg2]] +! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1 +end subroutine vec_stxv_test_vi4i4vai4 + +!---------------------- +! vec_xst +!---------------------- + +! CHECK-LABEL: vec_xst_test_vr4i2r4 +subroutine vec_xst_test_vr4i2r4(arg1, arg2, arg3) + vector(real(4)) :: arg1 + integer(2) :: arg2 + real(4) :: arg3 + call vec_xst(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3ptr:.*]] = fir.convert %arg2 : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3ptr]], %[[arg2]] : (!fir.ref>, i16) -> !fir.ref> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3ptr]][%[[arg2]]] : (!llvm.ptr, i16) -> !llvm.ptr +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 +! CHECK: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]] +! CHECK: store <4 x float> %[[arg1]], ptr %[[addr]], align 1 +end subroutine vec_xst_test_vr4i2r4 + +! CHECK-LABEL: vec_xst_test_vi4i8ia4 +subroutine vec_xst_test_vi4i8ia4(arg1, arg2, arg3, i) + vector(integer(4)) :: arg1 + integer(8) :: arg2 + integer(4) :: arg3(10) + integer(4) :: i + call vec_xst(arg1, arg2, arg3(i)) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref +! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref +! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64 +! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64 +! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64 +! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %arg2, %[[idx64m1]] : (!fir.ref>, i64) -> !fir.ref +! CHECK-FIR: %[[elemref:.*]] = fir.convert %[[elem]] : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemref]], %[[arg2]] : (!fir.ref>, i64) -> !fir.ref> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr +! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64 +! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64 +! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64 +! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr>, i64) -> !llvm.ptr +! CHECK-LLVMIR: %[[elemref:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemref]][%[[arg2]]] : (!llvm.ptr, i64) -> !llvm.ptr +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i64, ptr %1, align 8 +! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4 +! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64 +! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1 +! CHECK: %[[elem:.*]] = getelementptr [10 x i32], ptr %2, i32 0, i64 %[[idx64m1]] +! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i64 %6 +! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1 +end subroutine vec_xst_test_vi4i8ia4 + +! CHECK-LABEL: vec_xst_test_vi2i4vi2 +subroutine vec_xst_test_vi2i4vi2(arg1, arg2, arg3) + vector(integer(2)) :: arg1 + integer(4) :: arg2 + vector(integer(2)) :: arg3 + call vec_xst(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4 +! CHECK: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]] +! CHECK: store <8 x i16> %[[arg1]], ptr %[[addr]], align 1 +end subroutine vec_xst_test_vi2i4vi2 + +! CHECK-LABEL: vec_xst_test_vi4i4vai4 +subroutine vec_xst_test_vi4i4vai4(arg1, arg2, arg3, i) + vector(integer(4)) :: arg1 + integer(4) :: arg2 + vector(integer(4)) :: arg3(20) + integer(4) :: i + call vec_xst(arg1, arg2, arg3(i)) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref +! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref +! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64 +! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64 +! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64 +! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %[[arg3:.*]], %[[idx64m1]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK-FIR: %[[elemptr:.*]] = fir.convert %[[elem]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr +! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64 +! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64 +! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64 +! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %[[arg3:.*]][0, %[[idx64m1]]] : (!llvm.ptr>>, i64) -> !llvm.ptr> +! CHECK-LLVMIR: %[[elemptr:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemptr]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4 +! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4 +! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64 +! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1 +! CHECK: %[[elem:.*]] = getelementptr [20 x <4 x i32>], ptr %2, i32 0, i64 %[[idx64m1]] +! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i32 %[[arg2]] +! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1 +end subroutine vec_xst_test_vi4i4vai4 + +!---------------------- +! vec_xst_be +!---------------------- + +! CHECK-LABEL: vec_xst_be_test_vr4i2r4 +subroutine vec_xst_be_test_vr4i2r4(arg1, arg2, arg3) + vector(real(4)) :: arg1 + integer(2) :: arg2 + real(4) :: arg3 + call vec_xst_be(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3ptr:.*]] = fir.convert %arg2 : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3ptr]], %[[arg2]] : (!fir.ref>, i16) -> !fir.ref> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[undef:.*]] = fir.undefined vector<4xf32> +! CHECK-FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[undef]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32> +! CHECK-FIR: %[[fvarg1:.*]] = fir.convert %[[shf]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[fvarg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3ptr]][%[[arg2]]] : (!llvm.ptr, i16) -> !llvm.ptr +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xf32> +! CHECK-LLVMIR: %[[shf:.*]] = llvm.shufflevector %[[arg1]], %[[undef]] [3, 2, 1, 0] : vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[shf]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 +! CHECK: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]] +! CHECK: %[[shf:.*]] = shufflevector <4 x float> %[[arg1]], <4 x float> undef, <4 x i32> +! CHECK: store <4 x float> %[[shf]], ptr %[[addr]], align 1 +end subroutine vec_xst_be_test_vr4i2r4 + +! CHECK-LABEL: vec_xst_be_test_vi4i8ia4 +subroutine vec_xst_be_test_vi4i8ia4(arg1, arg2, arg3, i) + vector(integer(4)) :: arg1 + integer(8) :: arg2 + integer(4) :: arg3(10) + integer(4) :: i + call vec_xst_be(arg1, arg2, arg3(i)) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref +! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref +! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64 +! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64 +! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64 +! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %arg2, %[[idx64m1]] : (!fir.ref>, i64) -> !fir.ref +! CHECK-FIR: %[[elemref:.*]] = fir.convert %[[elem]] : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemref]], %[[arg2]] : (!fir.ref>, i64) -> !fir.ref> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[undef:.*]] = fir.undefined vector<4xi32> +! CHECK-FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[undef]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32> +! CHECK-FIR: %[[src:.*]] = fir.convert %[[shf]] : (vector<4xi32>) -> !fir.vector<4:i32> +! CHECK-FIR: fir.store %[[src]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr +! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64 +! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64 +! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64 +! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr>, i64) -> !llvm.ptr +! CHECK-LLVMIR: %[[elemref:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemref]][%[[arg2]]] : (!llvm.ptr, i64) -> !llvm.ptr +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xi32> +! CHECK-LLVMIR: %[[src:.*]] = llvm.shufflevector %[[arg1]], %[[undef]] [3, 2, 1, 0] : vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i64, ptr %1, align 8 +! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4 +! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64 +! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1 +! CHECK: %[[elem:.*]] = getelementptr [10 x i32], ptr %2, i32 0, i64 %[[idx64m1]] +! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i64 %6 +! CHECK: %[[src:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> undef, <4 x i32> +! CHECK: store <4 x i32> %[[src]], ptr %[[trg]], align 1 +end subroutine vec_xst_be_test_vi4i8ia4 + +! CHECK-LABEL: vec_xst_be_test_vi2i4vi2 +subroutine vec_xst_be_test_vi2i4vi2(arg1, arg2, arg3) + vector(integer(2)) :: arg1 + integer(4) :: arg2 + vector(integer(2)) :: arg3 + call vec_xst_be(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[undef:.*]] = fir.undefined vector<8xi16> +! CHECK-FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[undef]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16> +! CHECK-FIR: %[[src:.*]] = fir.convert %[[shf]] : (vector<8xi16>) -> !fir.vector<8:i16> +! CHECK-FIR: fir.store %[[src]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: %[[undef:.*]] = llvm.mlir.undef : vector<8xi16> +! CHECK-LLVMIR: %[[src:.*]] = llvm.shufflevector %[[arg1]], %[[undef]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16> +! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4 +! CHECK: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]] +! CHECK: %[[src:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> undef, <8 x i32> +! CHECK: store <8 x i16> %[[src]], ptr %[[addr]], align 1 +end subroutine vec_xst_be_test_vi2i4vi2 + +! CHECK-LABEL: vec_xst_be_test_vi4i4vai4 +subroutine vec_xst_be_test_vi4i4vai4(arg1, arg2, arg3, i) + vector(integer(4)) :: arg1 + integer(4) :: arg2 + vector(integer(4)) :: arg3(20) + integer(4) :: i + call vec_xst_be(arg1, arg2, arg3(i)) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref +! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref +! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64 +! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64 +! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64 +! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %[[arg3:.*]], %[[idx64m1]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK-FIR: %[[elemptr:.*]] = fir.convert %[[elem]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[undef:.*]] = fir.undefined vector<4xi32> +! CHECK-FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[undef]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32> +! CHECK-FIR: %[[src:.*]] = fir.convert %[[shf]] : (vector<4xi32>) -> !fir.vector<4:i32> +! CHECK-FIR: fir.store %[[src]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr +! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64 +! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64 +! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64 +! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %[[arg3:.*]][0, %[[idx64m1]]] : (!llvm.ptr>>, i64) -> !llvm.ptr> +! CHECK-LLVMIR: %[[elemptr:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemptr]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xi32> +! CHECK-LLVMIR: %[[src:.*]] = llvm.shufflevector %[[arg1]], %[[undef]] [3, 2, 1, 0] : vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4 +! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4 +! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64 +! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1 +! CHECK: %[[elem:.*]] = getelementptr [20 x <4 x i32>], ptr %2, i32 0, i64 %[[idx64m1]] +! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i32 %[[arg2]] +! CHECK: %[[src:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> undef, <4 x i32> +! CHECK: store <4 x i32> %[[src]], ptr %[[trg]], align 1 +end subroutine vec_xst_be_test_vi4i4vai4 + +!---------------------- +! vec_xstd2 +!---------------------- + +! CHECK-LABEL: vec_xstd2_test_vr4i2r4 +subroutine vec_xstd2_test_vr4i2r4(arg1, arg2, arg3) + vector(real(4)) :: arg1 + integer(2) :: arg2 + real(4) :: arg3 + call vec_xstd2(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3ptr:.*]] = fir.convert %arg2 : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3ptr]], %[[arg2]] : (!fir.ref>, i16) -> !fir.ref> +! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[bcsrc:.*]] = vector.bitcast %[[vsrc]] : vector<4xf32> to vector<2xi64> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[bcsrc]] : (vector<2xi64>) -> !fir.vector<2:i64> +! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3ptr]][%[[arg2]]] : (!llvm.ptr, i16) -> !llvm.ptr +! CHECK-LLVMIR: %[[src:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<2xi64> +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 +! CHECK: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]] +! CHECK: %[[src:.*]] = bitcast <4 x float> %[[arg1]] to <2 x i64> +! CHECK: store <2 x i64> %[[src]], ptr %[[addr]], align 1 +end subroutine vec_xstd2_test_vr4i2r4 + +! CHECK-LABEL: vec_xstd2_test_vi4i8ia4 +subroutine vec_xstd2_test_vi4i8ia4(arg1, arg2, arg3, i) + vector(integer(4)) :: arg1 + integer(8) :: arg2 + integer(4) :: arg3(10) + integer(4) :: i + call vec_xstd2(arg1, arg2, arg3(i)) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref +! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref +! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64 +! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64 +! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64 +! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %arg2, %[[idx64m1]] : (!fir.ref>, i64) -> !fir.ref +! CHECK-FIR: %[[elemref:.*]] = fir.convert %[[elem]] : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemref]], %[[arg2]] : (!fir.ref>, i64) -> !fir.ref> +! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[bcsrc:.*]] = vector.bitcast %[[vsrc]] : vector<4xi32> to vector<2xi64> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[bcsrc]] : (vector<2xi64>) -> !fir.vector<2:i64> +! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr +! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64 +! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64 +! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64 +! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr>, i64) -> !llvm.ptr +! CHECK-LLVMIR: %[[elemref:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemref]][%[[arg2]]] : (!llvm.ptr, i64) -> !llvm.ptr +! CHECK-LLVMIR: %[[src:.*]] = llvm.bitcast %[[arg1]] : vector<4xi32> to vector<2xi64> +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i64, ptr %1, align 8 +! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4 +! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64 +! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1 +! CHECK: %[[elem:.*]] = getelementptr [10 x i32], ptr %2, i32 0, i64 %[[idx64m1]] +! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i64 %6 +! CHECK: %[[src:.*]] = bitcast <4 x i32> %[[arg1]] to <2 x i64> +! CHECK: store <2 x i64> %[[src]], ptr %[[trg]], align 1 +end subroutine vec_xstd2_test_vi4i8ia4 + +! CHECK-LABEL: vec_xstd2_test_vi2i4vi2 +subroutine vec_xstd2_test_vi2i4vi2(arg1, arg2, arg3) + vector(integer(2)) :: arg1 + integer(4) :: arg2 + vector(integer(2)) :: arg3 + call vec_xstd2(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[bcsrc:.*]] = vector.bitcast %[[vsrc]] : vector<8xi16> to vector<2xi64> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[bcsrc]] : (vector<2xi64>) -> !fir.vector<2:i64> +! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: %[[src:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<2xi64> +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4 +! CHECK: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]] +! CHECK: %[[src:.*]] = bitcast <8 x i16> %[[arg1]] to <2 x i64> +! CHECK: store <2 x i64> %[[src]], ptr %[[addr]], align 1 +end subroutine vec_xstd2_test_vi2i4vi2 + +! CHECK-LABEL: vec_xstd2_test_vi4i4vai4 +subroutine vec_xstd2_test_vi4i4vai4(arg1, arg2, arg3, i) + vector(integer(4)) :: arg1 + integer(4) :: arg2 + vector(integer(4)) :: arg3(20) + integer(4) :: i + call vec_xstd2(arg1, arg2, arg3(i)) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref +! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref +! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64 +! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64 +! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64 +! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %[[arg3:.*]], %[[idx64m1]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK-FIR: %[[elemptr:.*]] = fir.convert %[[elem]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[bcsrc:.*]] = vector.bitcast %[[vsrc]] : vector<4xi32> to vector<2xi64> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[bcsrc]] : (vector<2xi64>) -> !fir.vector<2:i64> +! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr +! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64 +! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64 +! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64 +! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %[[arg3:.*]][0, %[[idx64m1]]] : (!llvm.ptr>>, i64) -> !llvm.ptr> +! CHECK-LLVMIR: %[[elemptr:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemptr]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: %[[src:.*]] = llvm.bitcast %[[arg1]] : vector<4xi32> to vector<2xi64> +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4 +! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4 +! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64 +! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1 +! CHECK: %[[elem:.*]] = getelementptr [20 x <4 x i32>], ptr %2, i32 0, i64 %[[idx64m1]] +! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i32 %[[arg2]] +! CHECK: %[[src:.*]] = bitcast <4 x i32> %[[arg1]] to <2 x i64> +! CHECK: store <2 x i64> %[[src]], ptr %[[trg]], align 1 +end subroutine vec_xstd2_test_vi4i4vai4 + +!---------------------- +! vec_xstw4 +!---------------------- + +! CHECK-LABEL: vec_xstw4_test_vr4i2r4 +subroutine vec_xstw4_test_vr4i2r4(arg1, arg2, arg3) + vector(real(4)) :: arg1 + integer(2) :: arg2 + real(4) :: arg3 + call vec_xstw4(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK-FIR: %[[arg3ptr:.*]] = fir.convert %arg2 : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3ptr]], %[[arg2]] : (!fir.ref>, i16) -> !fir.ref> +! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[vsrc]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr +! CHECK-LLVMIR: %[[arg3ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3ptr]][%[[arg2]]] : (!llvm.ptr, i16) -> !llvm.ptr +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 +! CHECK: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]] +! CHECK: store <4 x float> %[[arg1]], ptr %[[addr]], align 1 +end subroutine vec_xstw4_test_vr4i2r4 + +! CHECK-LABEL: vec_xstw4_test_vi4i8ia4 +subroutine vec_xstw4_test_vi4i8ia4(arg1, arg2, arg3, i) + vector(integer(4)) :: arg1 + integer(8) :: arg2 + integer(4) :: arg3(10) + integer(4) :: i + call vec_xstw4(arg1, arg2, arg3(i)) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref +! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref +! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64 +! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64 +! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64 +! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %arg2, %[[idx64m1]] : (!fir.ref>, i64) -> !fir.ref +! CHECK-FIR: %[[elemref:.*]] = fir.convert %[[elem]] : (!fir.ref) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemref]], %[[arg2]] : (!fir.ref>, i64) -> !fir.ref> +! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[vsrc]] : (vector<4xi32>) -> !fir.vector<4:i32> +! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr +! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64 +! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64 +! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64 +! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr>, i64) -> !llvm.ptr +! CHECK-LLVMIR: %[[elemref:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemref]][%[[arg2]]] : (!llvm.ptr, i64) -> !llvm.ptr +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i64, ptr %1, align 8 +! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4 +! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64 +! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1 +! CHECK: %[[elem:.*]] = getelementptr [10 x i32], ptr %2, i32 0, i64 %[[idx64m1]] +! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i64 %6 +! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1 +end subroutine vec_xstw4_test_vi4i8ia4 + +! CHECK-LABEL: vec_xstw4_test_vi2i4vi2 +subroutine vec_xstw4_test_vi2i4vi2(arg1, arg2, arg3) + vector(integer(2)) :: arg1 + integer(4) :: arg2 + vector(integer(2)) :: arg3 + call vec_xstw4(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref +! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[bcsrc:.*]] = vector.bitcast %[[vsrc]] : vector<8xi16> to vector<4xi32> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[bcsrc]] : (vector<4xi32>) -> !fir.vector<4:i32> +! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: %[[src:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32> +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4 +! CHECK: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]] +! CHECK: %[[src:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32> +! CHECK: store <4 x i32> %[[src]], ptr %[[addr]], align 1 +end subroutine vec_xstw4_test_vi2i4vi2 + +! CHECK-LABEL: vec_xstw4_test_vi4i4vai4 +subroutine vec_xstw4_test_vi4i4vai4(arg1, arg2, arg3, i) + vector(integer(4)) :: arg1 + integer(4) :: arg2 + vector(integer(4)) :: arg3(20) + integer(4) :: i + call vec_xstw4(arg1, arg2, arg3(i)) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref +! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref +! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64 +! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64 +! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64 +! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %[[arg3:.*]], %[[idx64m1]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK-FIR: %[[elemptr:.*]] = fir.convert %[[elem]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref>, i32) -> !fir.ref> +! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref>) -> !fir.ref> +! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[vsrc]] : (vector<4xi32>) -> !fir.vector<4:i32> +! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr +! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr +! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64 +! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64 +! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64 +! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %[[arg3:.*]][0, %[[idx64m1]]] : (!llvm.ptr>>, i64) -> !llvm.ptr> +! CHECK-LLVMIR: %[[elemptr:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr> to !llvm.ptr +! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemptr]][%[[arg2]]] : (!llvm.ptr, i32) -> !llvm.ptr +! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr to !llvm.ptr> +! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]] + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 +! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4 +! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4 +! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64 +! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1 +! CHECK: %[[elem:.*]] = getelementptr [20 x <4 x i32>], ptr %2, i32 0, i64 %[[idx64m1]] +! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i32 %[[arg2]] +! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1 +end subroutine vec_xstw4_test_vi4i4vai4 +