diff --git a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h --- a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h @@ -29,9 +29,13 @@ Convert, Ctf, Cvf, - Nmadd, + Mergeh, + Mergel, Msub, Mul, + Nmadd, + Perm, + Permi, Sel, Sl, Sld, @@ -167,6 +171,20 @@ fir::ExtendedValue genVecAnyCompare(mlir::Type resultType, llvm::ArrayRef args); + fir::ExtendedValue genVecExtract(mlir::Type resultType, + llvm::ArrayRef args); + + fir::ExtendedValue genVecInsert(mlir::Type resultType, + llvm::ArrayRef args); + + template + fir::ExtendedValue genVecMerge(mlir::Type resultType, + llvm::ArrayRef args); + + template + fir::ExtendedValue genVecPerm(mlir::Type resultType, + llvm::ArrayRef args); + template fir::ExtendedValue genVecNmaddMsub(mlir::Type resultType, llvm::ArrayRef args); diff --git a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp --- a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp @@ -121,6 +121,24 @@ &PI::genVecConvert), {{{"arg1", asValue}}}, /*isElemental=*/true}, + {"__ppc_vec_extract", + static_cast(&PI::genVecExtract), + {{{"arg1", asValue}, {"arg2", asValue}}}, + /*isElemental=*/true}, + {"__ppc_vec_insert", + static_cast(&PI::genVecInsert), + {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, + /*isElemental=*/true}, + {"__ppc_vec_mergeh", + static_cast( + &PI::genVecMerge), + {{{"arg1", asValue}, {"arg2", asValue}}}, + /*isElemental=*/true}, + {"__ppc_vec_mergel", + static_cast( + &PI::genVecMerge), + {{{"arg1", asValue}, {"arg2", asValue}}}, + /*isElemental=*/true}, {"__ppc_vec_msub", static_cast( &PI::genVecNmaddMsub), @@ -136,6 +154,16 @@ &PI::genVecNmaddMsub), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, /*isElemental=*/true}, + {"__ppc_vec_perm", + static_cast( + &PI::genVecPerm), + {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, + /*isElemental=*/true}, + {"__ppc_vec_permi", + static_cast( + &PI::genVecPerm), + {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, + /*isElemental=*/true}, {"__ppc_vec_sel", static_cast(&PI::genVecSel), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, @@ -1066,6 +1094,150 @@ } } +static mlir::Value convertVectorElementOrder(fir::FirOpBuilder &builder, + mlir::Location loc, + VecTypeInfo vecInfo, + mlir::Value idx) { + mlir::Value numSub1{ + builder.createIntegerConstant(loc, idx.getType(), vecInfo.len - 1)}; + return builder.create(loc, idx.getType(), numSub1, idx); +} + +// VEC_EXTRACT +fir::ExtendedValue +PPCIntrinsicLibrary::genVecExtract(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 2); + auto argBases{getBasesForArgs(args)}; + auto argTypes{getTypesForArgs(argBases)}; + auto vecTyInfo{getVecTypeFromFir(argBases[0])}; + + auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())}; + auto varg0{builder.createConvert(loc, mlirTy, argBases[0])}; + + // arg2 modulo the number of elements in arg1 to determine the element + // position + auto numEle{builder.createIntegerConstant(loc, argTypes[1], vecTyInfo.len)}; + mlir::Value uremOp{ + builder.create(loc, argBases[1], numEle)}; + + if (!isNativeVecElemOrderOnLE()) + uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp); + + return builder.create(loc, varg0, uremOp); +} + +// VEC_INSERT +fir::ExtendedValue +PPCIntrinsicLibrary::genVecInsert(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 3); + auto argBases{getBasesForArgs(args)}; + auto argTypes{getTypesForArgs(argBases)}; + auto vecTyInfo{getVecTypeFromFir(argBases[1])}; + auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())}; + auto varg1{builder.createConvert(loc, mlirTy, argBases[1])}; + + auto numEle{builder.createIntegerConstant(loc, argTypes[2], vecTyInfo.len)}; + mlir::Value uremOp{ + builder.create(loc, argBases[2], numEle)}; + + if (!isNativeVecElemOrderOnLE()) + uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp); + + auto res{builder.create(loc, argBases[0], + varg1, uremOp)}; + return builder.create(loc, vecTyInfo.toFirVectorType(), res); +} + +// VEC_MERGEH, VEC_MERGEL +template +fir::ExtendedValue +PPCIntrinsicLibrary::genVecMerge(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 2); + auto argBases{getBasesForArgs(args)}; + auto vecTyInfo{getVecTypeFromFir(argBases[0])}; + llvm::SmallVector mMask; // native vector element order mask + llvm::SmallVector rMask; // non-native vector element order mask + + switch (vop) { + case VecOp::Mergeh: { + switch (vecTyInfo.len) { + case 2: { + enum { V1 = 0, V2 = 2 }; + mMask = {V1 + 0, V2 + 0}; + rMask = {V2 + 1, V1 + 1}; + break; + } + case 4: { + enum { V1 = 0, V2 = 4 }; + mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1}; + rMask = {V2 + 2, V1 + 2, V2 + 3, V1 + 3}; + break; + } + case 8: { + enum { V1 = 0, V2 = 8 }; + mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1, V1 + 2, V2 + 2, V1 + 3, V2 + 3}; + rMask = {V2 + 4, V1 + 4, V2 + 5, V1 + 5, V2 + 6, V1 + 6, V2 + 7, V1 + 7}; + break; + } + case 16: + mMask = {0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17}; + rMask = {0x18, 0x08, 0x19, 0x09, 0x1A, 0x0A, 0x1B, 0x0B, + 0x1C, 0x0C, 0x1D, 0x0D, 0x1E, 0x0E, 0x1F, 0x0F}; + break; + default: + llvm_unreachable("unexpected vector length"); + } + break; + } + case VecOp::Mergel: { + switch (vecTyInfo.len) { + case 2: { + enum { V1 = 0, V2 = 2 }; + mMask = {V1 + 1, V2 + 1}; + rMask = {V2 + 0, V1 + 0}; + break; + } + case 4: { + enum { V1 = 0, V2 = 4 }; + mMask = {V1 + 2, V2 + 2, V1 + 3, V2 + 3}; + rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1}; + break; + } + case 8: { + enum { V1 = 0, V2 = 8 }; + mMask = {V1 + 4, V2 + 4, V1 + 5, V2 + 5, V1 + 6, V2 + 6, V1 + 7, V2 + 7}; + rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1, V2 + 2, V1 + 2, V2 + 3, V1 + 3}; + break; + } + case 16: + mMask = {0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F}; + rMask = {0x10, 0x00, 0x11, 0x01, 0x12, 0x02, 0x13, 0x03, + 0x14, 0x04, 0x15, 0x05, 0x16, 0x06, 0x17, 0x07}; + break; + default: + llvm_unreachable("unexpected vector length"); + } + break; + } + default: + llvm_unreachable("invalid vector operation for generator"); + } + + auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)}; + + llvm::SmallVector &mergeMask = + (isBEVecElemOrderOnLE()) ? rMask : mMask; + + auto callOp{builder.create(loc, vargs[0], vargs[1], + mergeMask)}; + return builder.createConvert(loc, resultType, callOp); +} + // VEC_NMADD, VEC_MSUB template fir::ExtendedValue @@ -1112,6 +1284,123 @@ llvm_unreachable("Invalid vector operation for generator"); } +// VEC_PERM, VEC_PERMI +template +fir::ExtendedValue +PPCIntrinsicLibrary::genVecPerm(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 3); + auto context{builder.getContext()}; + auto argBases{getBasesForArgs(args)}; + auto argTypes{getTypesForArgs(argBases)}; + auto vecTyInfo{getVecTypeFromFir(argBases[0])}; + auto mlirTy{vecTyInfo.toMlirVectorType(context)}; + + auto vi32Ty{mlir::VectorType::get(4, mlir::IntegerType::get(context, 32))}; + auto vf64Ty{mlir::VectorType::get(2, mlir::FloatType::getF64(context))}; + + auto mArg0{builder.createConvert(loc, mlirTy, argBases[0])}; + auto mArg1{builder.createConvert(loc, mlirTy, argBases[1])}; + + switch (vop) { + case VecOp::Perm: { + VecTypeInfo maskVecTyInfo{getVecTypeFromFir(argBases[2])}; + auto mlirMaskTy{maskVecTyInfo.toMlirVectorType(context)}; + auto mMask{builder.createConvert(loc, mlirMaskTy, argBases[2])}; + + if (mlirTy != vi32Ty) { + mArg0 = + builder.create(loc, vi32Ty, mArg0).getResult(); + mArg1 = + builder.create(loc, vi32Ty, mArg1).getResult(); + } + + auto funcOp{builder.addNamedFunction( + loc, "llvm.ppc.altivec.vperm", + genFuncType, Ty::IntegerVector<4>, + Ty::IntegerVector<4>, Ty::IntegerVector<1>>(context, + builder))}; + + llvm::SmallVector newArgs; + if (isNativeVecElemOrderOnLE()) { + auto i8Ty{mlir::IntegerType::get(context, 8)}; + auto v8Ty{mlir::VectorType::get(16, i8Ty)}; + auto negOne{builder.createIntegerConstant(loc, i8Ty, -1)}; + auto vNegOne{ + builder.create(loc, v8Ty, negOne)}; + + mMask = builder.create(loc, mMask, vNegOne); + newArgs = {mArg1, mArg0, mMask}; + } else { + newArgs = {mArg0, mArg1, mMask}; + } + + auto res{builder.create(loc, funcOp, newArgs).getResult(0)}; + + if (res.getType() != argTypes[0]) { + // fir.call llvm.ppc.altivec.vperm returns !fir.vector + // convert the result back to the original type + res = builder.createConvert(loc, vi32Ty, res); + if (mlirTy != vi32Ty) + res = + builder.create(loc, mlirTy, res).getResult(); + } + return builder.createConvert(loc, resultType, res); + } + case VecOp::Permi: { + // arg3 is a constant + auto constIntOp{ + mlir::dyn_cast(argBases[2].getDefiningOp()) + .getValue() + .dyn_cast_or_null()}; + assert(constIntOp && "expected integer constant argument"); + auto constInt{constIntOp.getInt()}; + // arg1, arg2, and result type share same VecTypeInfo + if (vecTyInfo.isFloat()) { + mArg0 = + builder.create(loc, vf64Ty, mArg0).getResult(); + mArg1 = + builder.create(loc, vf64Ty, mArg1).getResult(); + } + + llvm::SmallVector nMask; // native vector element order mask + llvm::SmallVector rMask; // non-native vector element order mask + enum { V1 = 0, V2 = 2 }; + switch (constInt) { + case 0: + nMask = {V1 + 0, V2 + 0}; + rMask = {V2 + 1, V1 + 1}; + break; + case 1: + nMask = {V1 + 0, V2 + 1}; + rMask = {V2 + 0, V1 + 1}; + break; + case 2: + nMask = {V1 + 1, V2 + 0}; + rMask = {V2 + 1, V1 + 0}; + break; + case 3: + nMask = {V1 + 1, V2 + 1}; + rMask = {V2 + 0, V1 + 0}; + break; + default: + llvm_unreachable("unexpected arg3 value for vec_permi"); + } + + llvm::SmallVector mask = + (isBEVecElemOrderOnLE()) ? rMask : nMask; + auto res{builder.create(loc, mArg0, mArg1, mask)}; + if (res.getType() != mlirTy) { + auto cast{builder.create(loc, mlirTy, res)}; + return builder.createConvert(loc, resultType, cast); + } + return builder.createConvert(loc, resultType, res); + } + default: + llvm_unreachable("invalid vector operation for generator"); + } +} + // VEC_SEL fir::ExtendedValue PPCIntrinsicLibrary::genVecSel(mlir::Type resultType, diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -1536,6 +1536,9 @@ if (specific.name().ToString().compare(0, 14, "__ppc_vec_ctf_") == 0) { return CheckArgumentIsConstantExprInRange(actuals, 1, 0, 31, messages); } + if (specific.name().ToString().compare(0, 16, "__ppc_vec_permi_") == 0) { + return CheckArgumentIsConstantExprInRange(actuals, 2, 0, 3, messages); + } return false; } diff --git a/flang/module/__ppc_intrinsics.f90 b/flang/module/__ppc_intrinsics.f90 --- a/flang/module/__ppc_intrinsics.f90 +++ b/flang/module/__ppc_intrinsics.f90 @@ -126,6 +126,14 @@ !dir$ ignore_tkr(k) arg2; \ end function ; +! integer function f(vector(i), i) +#define ELEM_FUNC_IVII(VKIND) \ + elemental integer(VKIND) function elem_func_i##VKIND##vi##VKIND##i(arg1, arg2); \ + vector(integer(VKIND)), intent(in) :: arg1; \ + integer(8), intent(in) :: arg2; \ + !dir$ ignore_tkr(k) arg2; \ + end function ; + ! vector(r) function f(vector(u), i) #define ELEM_FUNC_VRVUI(VKIND) \ elemental vector(real(VKIND)) function elem_func_vr##VKIND##vu##VKIND##i(arg1, arg2); \ @@ -134,6 +142,14 @@ !dir$ ignore_tkr(k) arg2; \ end function ; +! real function f(vector(r), i) +#define ELEM_FUNC_RVRI(VKIND) \ + elemental real(VKIND) function elem_func_r##VKIND##vr##VKIND##i(arg1, arg2); \ + vector(real(VKIND)), intent(in) :: arg1; \ + integer(8), intent(in) :: arg2; \ + !dir$ ignore_tkr(k) arg2; \ + end function ; + ! The following macros are specific for the vec_convert(v, mold) intrinsics as ! the argument keywords are different from the other vector intrinsics. ! @@ -167,7 +183,8 @@ FUNC_VEC_CONVERT_VIVIVI(1) FUNC_VEC_CONVERT_VIVIVI(2) FUNC_VEC_CONVERT_VIVIVI(4) FUNC_VEC_CONVERT_VIVIVI(8) FUNC_VEC_CONVERT_VUVIVU(1) FUNC_VEC_CONVERT_VUVIVU(2) FUNC_VEC_CONVERT_VUVIVU(4) FUNC_VEC_CONVERT_VUVIVU(8) FUNC_VEC_CONVERT_VRVIVR(4) FUNC_VEC_CONVERT_VRVIVR(8) - + ELEM_FUNC_IVII(1) ELEM_FUNC_IVII(2) ELEM_FUNC_IVII(4) ELEM_FUNC_IVII(8) + ELEM_FUNC_RVRI(4) ELEM_FUNC_RVRI(8) ELEM_FUNC_VIVIVI(1) ELEM_FUNC_VIVIVI(2) ELEM_FUNC_VIVIVI(4) ELEM_FUNC_VIVIVI(8) ELEM_FUNC_VUVIVI(1) ELEM_FUNC_VUVIVI(2) ELEM_FUNC_VUVIVI(4) ELEM_FUNC_VUVIVI(8) ELEM_FUNC_VUVUVU(1) ELEM_FUNC_VUVUVU(2) ELEM_FUNC_VUVUVU(4) ELEM_FUNC_VUVUVU(8) @@ -190,18 +207,20 @@ #undef FUNC_VEC_CONVERT_VRVIVR #undef FUNC_VEC_CONVERT_VUVIVU #undef FUNC_VEC_CONVERT_VIVIVI +#undef ELEM_FUNC_RVRI #undef ELEM_FUNC_VRVUI +#undef ELEM_FUNC_IVII #undef ELEM_FUNC_VRVII -#undef ELEM_FUNC_IVIVI -#undef ELEM_FUNC_IVUVU -#undef ELEM_FUNC_VIVIVU_2 -#undef ELEM_FUNC_VUVUVU_2 -#undef ELEM_FUNC_VRVRVU_2 #undef ELEM_FUNC_IVRVR +#undef ELEM_FUNC_IVUVU +#undef ELEM_FUNC_IVIVI #undef ELEM_FUNC_VUVRVR +#undef ELEM_FUNC_VRVRVU_2 #undef ELEM_FUNC_VRVRVR -#undef ELEM_FUNC_VIVIVU #undef ELEM_FUNC_VUVUVU +#undef ELEM_FUNC_VUVUVU_2 +#undef ELEM_FUNC_VIVIVU +#undef ELEM_FUNC_VIVIVU_2 #undef ELEM_FUNC_VUVIVI #undef ELEM_FUNC_VIVIVI @@ -213,25 +232,28 @@ end function ; ! vector(i) function f(vector(i), vector(i), vector(u)) -#define ELEM_FUNC_VIVIVIVU(VKIND) \ - elemental vector(integer(VKIND)) function elem_func_vi##VKIND##vi##VKIND##vi##VKIND##vu##VKIND(arg1, arg2, arg3); \ +#define ELEM_FUNC_VIVIVIVU_2(VKIND, UKIND) \ + elemental vector(integer(VKIND)) function elem_func_vi##VKIND##vi##VKIND##vi##VKIND##vu##UKIND(arg1, arg2, arg3); \ vector(integer(VKIND)), intent(in) :: arg1, arg2; \ - vector(unsigned(VKIND)), intent(in) :: arg3; \ + vector(unsigned(UKIND)), intent(in) :: arg3; \ end function ; +#define ELEM_FUNC_VIVIVIVU(VKIND) ELEM_FUNC_VIVIVIVU_2(VKIND, VKIND) ! vector(u) function f(vector(u), vector(u), vector(u)) -#define ELEM_FUNC_VUVUVUVU(VKIND) \ - elemental vector(unsigned(VKIND)) function elem_func_vu##VKIND##vu##VKIND##vu##VKIND##vu##VKIND(arg1, arg2, arg3); \ - vector(unsigned(VKIND)), intent(in) :: arg1, arg2, arg3; \ +#define ELEM_FUNC_VUVUVUVU_2(VKIND, UKIND) \ + elemental vector(unsigned(VKIND)) function elem_func_vu##VKIND##vu##VKIND##vu##VKIND##vu##UKIND(arg1, arg2, arg3); \ + vector(unsigned(VKIND)), intent(in) :: arg1, arg2; \ + vector(unsigned(UKIND)), intent(in) :: arg3; \ end function ; - +#define ELEM_FUNC_VUVUVUVU(VKIND) ELEM_FUNC_VUVUVUVU_2(VKIND, VKIND) + ! vector(r) function f(vector(r), vector(r), vector(u)) -#define ELEM_FUNC_VRVRVRVU(VKIND) \ - elemental vector(real(VKIND)) function elem_func_vr##VKIND##vr##VKIND##vr##VKIND##vu##VKIND(arg1, arg2, arg3); \ +#define ELEM_FUNC_VRVRVRVU_2(VKIND, UKIND) \ + elemental vector(real(VKIND)) function elem_func_vr##VKIND##vr##VKIND##vr##VKIND##vu##UKIND(arg1, arg2, arg3); \ vector(real(VKIND)), intent(in) :: arg1, arg2; \ - vector(unsigned(VKIND)), intent(in) :: arg3; \ + vector(unsigned(UKIND)), intent(in) :: arg3; \ end function ; - +#define ELEM_FUNC_VRVRVRVU(VKIND) ELEM_FUNC_VRVRVRVU_2(VKIND, VKIND) ! vector(i) function f(vector(i), vector(i), i) #define ELEM_FUNC_VIVIVII(VKIND) \ @@ -257,22 +279,49 @@ !dir$ ignore_tkr(k) arg3; \ end function ; +! vector(i) function f(i, vector(i), i) +#define ELEM_FUNC_VIIVII(VKIND) \ + elemental vector(integer(VKIND)) function elem_func_vi##VKIND##i##VKIND##vi##VKIND##i(arg1, arg2, arg3); \ + integer(VKIND), intent(in) :: arg1; \ + vector(integer(VKIND)), intent(in) :: arg2; \ + integer(8), intent(in) :: arg3; \ + !dir$ ignore_tkr(k) arg3; \ + end function ; + +! vector(r) function f(r, vector(r), i) +#define ELEM_FUNC_VRRVRI(VKIND) \ + elemental vector(real(VKIND)) function elem_func_vr##VKIND##r##VKIND##vr##VKIND##i(arg1, arg2, arg3); \ + real(VKIND), intent(in) :: arg1; \ + vector(real(VKIND)), intent(in) :: arg2; \ + integer(8), intent(in) :: arg3; \ + !dir$ ignore_tkr(k) arg3; \ + end function ; + ELEM_FUNC_VIVIVIVU(1) ELEM_FUNC_VIVIVIVU(2) ELEM_FUNC_VIVIVIVU(4) ELEM_FUNC_VIVIVIVU(8) ELEM_FUNC_VUVUVUVU(1) ELEM_FUNC_VUVUVUVU(2) ELEM_FUNC_VUVUVUVU(4) ELEM_FUNC_VUVUVUVU(8) ELEM_FUNC_VRVRVRVU(4) ELEM_FUNC_VRVRVRVU(8) + ELEM_FUNC_VIVIVIVU_2(2,1) ELEM_FUNC_VIVIVIVU_2(4,1) ELEM_FUNC_VIVIVIVU_2(8,1) + ELEM_FUNC_VUVUVUVU_2(2,1) ELEM_FUNC_VUVUVUVU_2(4,1) ELEM_FUNC_VUVUVUVU_2(8,1) + ELEM_FUNC_VRVRVRVU_2(4,1) ELEM_FUNC_VRVRVRVU_2(8,1) + ELEM_FUNC_VIIVII(1) ELEM_FUNC_VIIVII(2) ELEM_FUNC_VIIVII(4) ELEM_FUNC_VIIVII(8) + ELEM_FUNC_VRRVRI(4) ELEM_FUNC_VRRVRI(8) ELEM_FUNC_VRVRVRVR(4) ELEM_FUNC_VRVRVRVR(8) ELEM_FUNC_VIVIVII(1) ELEM_FUNC_VIVIVII(2) ELEM_FUNC_VIVIVII(4) ELEM_FUNC_VIVIVII(8) ELEM_FUNC_VUVUVUI(1) ELEM_FUNC_VUVUVUI(2) ELEM_FUNC_VUVUVUI(4) ELEM_FUNC_VUVUVUI(8) ELEM_FUNC_VRVRVRI(4) ELEM_FUNC_VRVRVRI(8) -#undef ELEM_FUNC_VIVIVII -#undef ELEM_FUNC_VUVUVUI +#undef ELEM_FUNC_VRRVRI +#undef ELEM_FUNC_VIIVII #undef ELEM_FUNC_VRVRVRI -#undef ELEM_FUNC_VRVRVRVR +#undef ELEM_FUNC_VUVUVUI +#undef ELEM_FUNC_VIVIVII #undef ELEM_FUNC_VRVRVRVU -#undef ELEM_FUNC_VRVRVRVR +#undef ELEM_FUNC_VRVRVRVU_2 #undef ELEM_FUNC_VUVUVUVU +#undef ELEM_FUNC_VUVUVUVU_2 #undef ELEM_FUNC_VIVIVIVU +#undef ELEM_FUNC_VIVIVIVU_2 +#undef ELEM_FUNC_VRVRVRVR end interface @@ -623,6 +672,28 @@ end interface vec_max public :: vec_max +! vec_mergeh + VEC_VI_VI_VI(vec_mergeh,1) VEC_VI_VI_VI(vec_mergeh,2) VEC_VI_VI_VI(vec_mergeh,4) VEC_VI_VI_VI(vec_mergeh,8) + VEC_VU_VU_VU(vec_mergeh,1) VEC_VU_VU_VU(vec_mergeh,2) VEC_VU_VU_VU(vec_mergeh,4) VEC_VU_VU_VU(vec_mergeh,8) + VEC_VR_VR_VR(vec_mergeh,4) VEC_VR_VR_VR(vec_mergeh,8) + interface vec_mergeh + procedure :: VI_VI_VI(vec_mergeh,1), VI_VI_VI(vec_mergeh,2), VI_VI_VI(vec_mergeh,4), VI_VI_VI(vec_mergeh,8) + procedure :: VU_VU_VU(vec_mergeh,1), VU_VU_VU(vec_mergeh,2), VU_VU_VU(vec_mergeh,4), VU_VU_VU(vec_mergeh,8) + procedure :: VR_VR_VR(vec_mergeh,4), VR_VR_VR(vec_mergeh,8) + end interface vec_mergeh + public :: vec_mergeh + +! vec_mergel + VEC_VI_VI_VI(vec_mergel,1) VEC_VI_VI_VI(vec_mergel,2) VEC_VI_VI_VI(vec_mergel,4) VEC_VI_VI_VI(vec_mergel,8) + VEC_VU_VU_VU(vec_mergel,1) VEC_VU_VU_VU(vec_mergel,2) VEC_VU_VU_VU(vec_mergel,4) VEC_VU_VU_VU(vec_mergel,8) + VEC_VR_VR_VR(vec_mergel,4) VEC_VR_VR_VR(vec_mergel,8) + interface vec_mergel + procedure :: VI_VI_VI(vec_mergel,1), VI_VI_VI(vec_mergel,2), VI_VI_VI(vec_mergel,4), VI_VI_VI(vec_mergel,8) + procedure :: VU_VU_VU(vec_mergel,1), VU_VU_VU(vec_mergel,2), VU_VU_VU(vec_mergel,4), VU_VU_VU(vec_mergel,8) + procedure :: VR_VR_VR(vec_mergel,4), VR_VR_VR(vec_mergel,8) + end interface vec_mergel + public :: vec_mergel + ! vec_min VEC_VI_VI_VI(vec_min,1) VEC_VI_VI_VI(vec_min,2) VEC_VI_VI_VI(vec_min,4) VEC_VI_VI_VI(vec_min,8) VEC_VU_VU_VU(vec_min,1) VEC_VU_VU_VU(vec_min,2) VEC_VU_VU_VU(vec_min,4) VEC_VU_VU_VU(vec_min,8) @@ -771,18 +842,34 @@ ! vector function(vector, vector, vector) !----------------------------------------- #define VR_VR_VR_VR(NAME, VKIND) __ppc_##NAME##_vr##VKIND##vr##VKIND##vr##VKIND##vr##VKIND -#define VI_VI_VI_VU(NAME, VKIND) __ppc_##NAME##_vi##VKIND##vi##VKIND##vi##VKIND##vu##VKIND -#define VU_VU_VU_VU(NAME, VKIND) __ppc_##NAME##_vu##VKIND##vu##VKIND##vu##VKIND##vu##VKIND -#define VR_VR_VR_VU(NAME, VKIND) __ppc_##NAME##_vr##VKIND##vr##VKIND##vr##VKIND##vu##VKIND +#define VI_VI_VI_VU_2(NAME, VKIND, UKIND) __ppc_##NAME##_vi##VKIND##vi##VKIND##vi##VKIND##vu##UKIND +#define VI_VI_VI_VU(NAME, VKIND) VI_VI_VI_VU_2(NAME, VKIND, VKIND) +#define VU_VU_VU_VU_2(NAME, VKIND, UKIND) __ppc_##NAME##_vu##VKIND##vu##VKIND##vu##VKIND##vu##UKIND +#define VU_VU_VU_VU(NAME, VKIND) VU_VU_VU_VU_2(NAME, VKIND, VKIND) +#define VR_VR_VR_VU_2(NAME, VKIND, UKIND) __ppc_##NAME##_vr##VKIND##vr##VKIND##vr##VKIND##vu##UKIND +#define VR_VR_VR_VU(NAME, VKIND) VR_VR_VR_VU_2(NAME, VKIND, VKIND) +! i0 indicates "!dir$ ignore_tkr(k) arg3" +#define VI_VI_VI_I(NAME, VKIND) __ppc_##NAME##_vi##VKIND##vi##VKIND##vi##VKIND##i0 +#define VU_VU_VU_I(NAME, VKIND) __ppc_##NAME##_vu##VKIND##vu##VKIND##vu##VKIND##i0 +#define VR_VR_VR_I(NAME, VKIND) __ppc_##NAME##_vr##VKIND##vr##VKIND##vr##VKIND##i0 #define VEC_VR_VR_VR_VR(NAME, VKIND) \ procedure(elem_func_vr##VKIND##vr##VKIND##vr##VKIND##vr##VKIND) :: VR_VR_VR_VR(NAME, VKIND); -#define VEC_VI_VI_VI_VU(NAME, VKIND) \ - procedure(elem_func_vi##VKIND##vi##VKIND##vi##VKIND##vu##VKIND) :: VI_VI_VI_VU(NAME, VKIND); -#define VEC_VU_VU_VU_VU(NAME, VKIND) \ - procedure(elem_func_vu##VKIND##vu##VKIND##vu##VKIND##vu##VKIND) :: VU_VU_VU_VU(NAME, VKIND); -#define VEC_VR_VR_VR_VU(NAME, VKIND) \ - procedure(elem_func_vr##VKIND##vr##VKIND##vr##VKIND##vu##VKIND) :: VR_VR_VR_VU(NAME, VKIND); +#define VEC_VI_VI_VI_VU_2(NAME, VKIND, UKIND) \ + procedure(elem_func_vi##VKIND##vi##VKIND##vi##VKIND##vu##UKIND) :: VI_VI_VI_VU_2(NAME, VKIND, UKIND); +#define VEC_VI_VI_VI_VU(NAME, VKIND) VEC_VI_VI_VI_VU_2(NAME, VKIND, VKIND) +#define VEC_VU_VU_VU_VU_2(NAME, VKIND, UKIND) \ + procedure(elem_func_vu##VKIND##vu##VKIND##vu##VKIND##vu##UKIND) :: VU_VU_VU_VU_2(NAME, VKIND, UKIND); +#define VEC_VU_VU_VU_VU(NAME, VKIND) VEC_VU_VU_VU_VU_2(NAME, VKIND, VKIND) +#define VEC_VR_VR_VR_VU_2(NAME, VKIND, UKIND) \ + procedure(elem_func_vr##VKIND##vr##VKIND##vr##VKIND##vu##UKIND) :: VR_VR_VR_VU_2(NAME, VKIND, UKIND); +#define VEC_VR_VR_VR_VU(NAME, VKIND) VEC_VR_VR_VR_VU_2(NAME, VKIND, VKIND) +#define VEC_VI_VI_VI_I(NAME, VKIND) \ + procedure(elem_func_vi##VKIND##vi##VKIND##vi##VKIND##i) :: VI_VI_VI_I(NAME, VKIND); +#define VEC_VU_VU_VU_I(NAME, VKIND) \ + procedure(elem_func_vu##VKIND##vu##VKIND##vu##VKIND##i) :: VU_VU_VU_I(NAME, VKIND); +#define VEC_VR_VR_VR_I(NAME, VKIND) \ + procedure(elem_func_vr##VKIND##vr##VKIND##vr##VKIND##i) :: VR_VR_VR_I(NAME, VKIND); ! vec_madd VEC_VR_VR_VR_VR(vec_madd,4) VEC_VR_VR_VR_VR(vec_madd,8) @@ -812,6 +899,28 @@ end interface vec_nmsub public :: vec_nmsub +! vec_perm + VEC_VI_VI_VI_VU_2(vec_perm,1,1) VEC_VI_VI_VI_VU_2(vec_perm,2,1) VEC_VI_VI_VI_VU_2(vec_perm,4,1) VEC_VI_VI_VI_VU_2(vec_perm,8,1) + VEC_VU_VU_VU_VU_2(vec_perm,1,1) VEC_VU_VU_VU_VU_2(vec_perm,2,1) VEC_VU_VU_VU_VU_2(vec_perm,4,1) VEC_VU_VU_VU_VU_2(vec_perm,8,1) + VEC_VR_VR_VR_VU_2(vec_perm,4,1) VEC_VR_VR_VR_VU_2(vec_perm,8,1) + interface vec_perm + procedure :: VI_VI_VI_VU_2(vec_perm,1,1), VI_VI_VI_VU_2(vec_perm,2,1), VI_VI_VI_VU_2(vec_perm,4,1), VI_VI_VI_VU_2(vec_perm,8,1) + procedure :: VU_VU_VU_VU_2(vec_perm,1,1), VU_VU_VU_VU_2(vec_perm,2,1), VU_VU_VU_VU_2(vec_perm,4,1), VU_VU_VU_VU_2(vec_perm,8,1) + procedure :: VR_VR_VR_VU_2(vec_perm,4,1), VR_VR_VR_VU_2(vec_perm,8,1) + end interface vec_perm + public :: vec_perm + +! vec_permi + VEC_VI_VI_VI_I(vec_permi,8) + VEC_VU_VU_VU_I(vec_permi,8) + VEC_VR_VR_VR_I(vec_permi,4) VEC_VR_VR_VR_I(vec_permi,8) + interface vec_permi + procedure :: VI_VI_VI_I(vec_permi,8) + procedure :: VU_VU_VU_I(vec_permi,8) + procedure :: VR_VR_VR_I(vec_permi,4), VR_VR_VR_I(vec_permi,8) + end interface vec_permi + public :: vec_permi + ! vec_sel VEC_VI_VI_VI_VU(vec_sel,1) VEC_VI_VI_VI_VU(vec_sel,2) VEC_VI_VI_VI_VU(vec_sel,4) VEC_VI_VI_VI_VU(vec_sel,8) VEC_VU_VU_VU_VU(vec_sel,1) VEC_VU_VU_VU_VU(vec_sel,2) VEC_VU_VU_VU_VU(vec_sel,4) VEC_VU_VU_VU_VU(vec_sel,8) @@ -823,15 +932,53 @@ end interface vec_sel public :: vec_sel +#undef VEC_VR_VR_VR_I +#undef VEC_VU_VU_VU_I +#undef VEC_VI_VI_VI_I +#undef VEC_VI_VI_VI_VU_2 #undef VEC_VI_VI_VI_VU +#undef VEC_VU_VU_VU_VU_2 #undef VEC_VU_VU_VU_VU +#undef VEC_VR_VR_VR_VU_2 #undef VEC_VR_VR_VR_VU #undef VEC_VR_VR_VR_VR +#undef VR_VR_VR_I +#undef VU_VU_VU_I +#undef VI_VI_VI_I #undef VI_VI_VI_VU +#undef VI_VI_VI_VU_2 #undef VU_VU_VU_VU +#undef VU_VU_VU_VU_2 #undef VR_VR_VR_VU +#undef VR_VR_VR_VU_2 #undef VR_VR_VR_VR +!------------------------------------------ +! vector function(integer, vector, integer) +! vector function(real, vector, integer) +!------------------------------------------ +#define VI_I_VI_I(NAME, VKIND) __ppc_##NAME##_vi##VKIND##i##VKIND##vi##VKIND##i0 +#define VR_R_VR_I(NAME, VKIND) __ppc_##NAME##_vr##VKIND##r##VKIND##vr##VKIND##i0 + +#define VEC_VI_I_VI_I(NAME, VKIND) \ + procedure(elem_func_vi##VKIND##i##VKIND##vi##VKIND##i) :: VI_I_VI_I(NAME, VKIND); +#define VEC_VR_R_VR_I(NAME, VKIND) \ + procedure(elem_func_vr##VKIND##r##VKIND##vr##VKIND##i) :: VR_R_VR_I(NAME, VKIND); + +! vec_insert + VEC_VI_I_VI_I(vec_insert,1) VEC_VI_I_VI_I(vec_insert,2) VEC_VI_I_VI_I(vec_insert,4) VEC_VI_I_VI_I(vec_insert,8) + VEC_VR_R_VR_I(vec_insert,4) VEC_VR_R_VR_I(vec_insert,8) + interface vec_insert + procedure :: VI_I_VI_I(vec_insert,1), VI_I_VI_I(vec_insert,2), VI_I_VI_I(vec_insert,4), VI_I_VI_I(vec_insert,8) + procedure :: VR_R_VR_I(vec_insert,4), VR_R_VR_I(vec_insert,8) + end interface vec_insert + public :: vec_insert + +#undef VEC_VR_R_VR_I +#undef VEC_VI_I_VI_I +#undef VR_R_VR_I +#undef VI_I_VI_I + !---------------------------------- ! integer function(vector, vector) !---------------------------------- @@ -864,6 +1011,26 @@ #undef I_VU_VU #undef I_VI_VI +!---------------------------------------- +! integer/real function(vector, integer) +!---------------------------------------- +#define I_VI_I(NAME, VKIND) __ppc_##NAME##_i##VKIND##vi##VKIND##i0 +#define R_VR_I(NAME, VKIND) __ppc_##NAME##_r##VKIND##vr##VKIND##i0 + +#define VEC_I_VI_I(NAME, VKIND) \ + procedure(elem_func_i##VKIND##vi##VKIND##i) :: I_VI_I(NAME, VKIND); +#define VEC_R_VR_I(NAME, VKIND) \ + procedure(elem_func_r##VKIND##vr##VKIND##i) :: R_VR_I(NAME, VKIND); + +! vec_extract + VEC_I_VI_I(vec_extract,1) VEC_I_VI_I(vec_extract,2) VEC_I_VI_I(vec_extract,4) VEC_I_VI_I(vec_extract,8) + VEC_R_VR_I(vec_extract,4) VEC_R_VR_I(vec_extract,8) + interface vec_extract + procedure :: I_VI_I(vec_extract,1), I_VI_I(vec_extract,2), I_VI_I(vec_extract,4), I_VI_I(vec_extract,8) + procedure :: R_VR_I(vec_extract,4), R_VR_I(vec_extract,8) + end interface + public :: vec_extract + !------------------------------------------ ! vector function(vector, vector, integer) !------------------------------------------ diff --git a/flang/test/Lower/PowerPC/ppc-vec-extract-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-extract-elem-order.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/PowerPC/ppc-vec-extract-elem-order.f90 @@ -0,0 +1,53 @@ +! RUN: bbc -emit-fir -fno-ppc-native-vector-element-order %s -o - | FileCheck --check-prefixes="FIR" %s +! RUN: %flang_fc1 -emit-llvm -fno-ppc-native-vector-element-order %s -o - | FileCheck --check-prefixes="LLVMIR" %s +! REQUIRES: target=powerpc{{.*}} + +!CHECK-LABEL: vec_extract_testr4i8 +subroutine vec_extract_testr4i8(arg1, arg2, r) + vector(real(4)) :: arg1 + real(4) :: r + integer(8) :: arg2 + r = vec_extract(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! FIR: %[[arg2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32> +! FIR: %[[c:.*]] = arith.constant 4 : i64 +! FIR: %[[urem:.*]] = llvm.urem %[[arg2]], %[[c]] : i64 +! FIR: %[[c2:.*]] = arith.constant 3 : i64 +! FIR: %[[sub:.*]] = llvm.sub %[[c2]], %[[urem]] : i64 +! FIR: %[[ext:.*]] = vector.extractelement %[[varg1]][%[[sub]] : i64] : vector<4xf32> +! FIR: fir.store %[[ext]] to %arg2 : !fir.ref + +! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! LLVMIR: %[[arg2:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! LLVMIR: %[[urem:.*]] = urem i64 %[[arg2]], 4 +! LLVMIR: %[[sub:.*]] = sub i64 3, %[[urem]] +! LLVMIR: %[[r:.*]] = extractelement <4 x float> %[[arg1]], i64 %[[sub]] +! LLVMIR: store float %[[r]], ptr %{{[0-9]}}, align 4 +end subroutine vec_extract_testr4i8 + +!CHECK-LABEL: vec_extract_testi8i1 +subroutine vec_extract_testi8i1(arg1, arg2, r) + vector(integer(8)) :: arg1 + integer(8) :: r + integer(1) :: arg2 + r = vec_extract(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! FIR: %[[arg2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64> +! FIR: %[[c:.*]] = arith.constant 2 : i8 +! FIR: %[[urem:.*]] = llvm.urem %[[arg2]], %[[c]] : i8 +! FIR: %[[c2:.*]] = arith.constant 1 : i8 +! FIR: %[[sub:.*]] = llvm.sub %[[c2]], %[[urem]] : i8 +! FIR: %[[ext:.*]] = vector.extractelement %[[varg1]][%[[sub]] : i8] : vector<2xi64> +! FIR: fir.store %[[ext]] to %arg2 : !fir.ref + +! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16 +! LLVMIR: %[[arg2:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! LLVMIR: %[[urem:.*]] = urem i8 %[[arg2]], 2 +! LLVMIR: %[[sub:.*]] = sub i8 1, %[[urem]] +! LLVMIR: %[[r:.*]] = extractelement <2 x i64> %[[arg1]], i8 %[[sub]] +! LLVMIR: store i64 %[[r]], ptr %{{[0-9]}}, align 8 +end subroutine vec_extract_testi8i1 diff --git a/flang/test/Lower/PowerPC/ppc-vec-extract.f90 b/flang/test/Lower/PowerPC/ppc-vec-extract.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/PowerPC/ppc-vec-extract.f90 @@ -0,0 +1,589 @@ +! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s +! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s +! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s +! REQUIRES: target=powerpc{{.*}} + +!------------- +! vec_extract +!------------- +! CHECK-LABEL: vec_extract_testf32 +subroutine vec_extract_testf32(x, i1, i2, i4, i8) + vector(real(4)) :: x + real(4) :: r + integer(1) :: i1 + integer(2) :: i2 + integer(4) :: i4 + integer(8) :: i8 + r = vec_extract(x, i1) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i8 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i8] : vector<4xf32> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i8) : i8 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[u:.*]] = urem i8 %[[i1]], 4 +! CHECK: %[[r:.*]] = extractelement <4 x float> %[[x]], i8 %[[u]] +! CHECK: store float %[[r]], ptr %{{[0-9]}}, align 4 + + r = vec_extract(x, i2) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i16 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i16] : vector<4xf32> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i16) : i16 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[u:.*]] = urem i16 %[[i2]], 4 +! CHECK: %[[r:.*]] = extractelement <4 x float> %[[x]], i16 %[[u]] +! CHECK: store float %[[r]], ptr %{{[0-9]}}, align 4 + + r = vec_extract(x, i4) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i32 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i32] : vector<4xf32> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i32) : i32 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[u:.*]] = urem i32 %[[i4]], 4 +! CHECK: %[[r:.*]] = extractelement <4 x float> %[[x]], i32 %[[u]] +! CHECK: store float %[[r]], ptr %{{[0-9]}}, align 4 + + r = vec_extract(x, i8) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i64 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i64] : vector<4xf32> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i64) : i64 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[u:.*]] = urem i64 %[[i8]], 4 +! CHECK: %[[r:.*]] = extractelement <4 x float> %[[x]], i64 %[[u]] +! CHECK: store float %[[r]], ptr %{{[0-9]}}, align 4 +end subroutine vec_extract_testf32 + +! CHECK-LABEL: vec_extract_testf64 +subroutine vec_extract_testf64(x, i1, i2, i4, i8) + vector(real(8)) :: x + real(8) :: r + integer(1) :: i1 + integer(2) :: i2 + integer(4) :: i4 + integer(8) :: i8 + r = vec_extract(x, i1) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i8 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i8] : vector<2xf64> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i8) : i8 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[u:.*]] = urem i8 %[[i1]], 2 +! CHECK: %[[r:.*]] = extractelement <2 x double> %[[x]], i8 %[[u]] +! CHECK: store double %[[r]], ptr %{{[0-9]}}, align 8 + + r = vec_extract(x, i2) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i16 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i16] : vector<2xf64> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i16) : i16 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[u:.*]] = urem i16 %[[i2]], 2 +! CHECK: %[[r:.*]] = extractelement <2 x double> %[[x]], i16 %[[u]] +! CHECK: store double %[[r]], ptr %{{[0-9]}}, align 8 + + r = vec_extract(x, i4) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i32 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i32] : vector<2xf64> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i32) : i32 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[u:.*]] = urem i32 %[[i4]], 2 +! CHECK: %[[r:.*]] = extractelement <2 x double> %[[x]], i32 %[[u]] +! CHECK: store double %[[r]], ptr %{{[0-9]}}, align 8 + + r = vec_extract(x, i8) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i64 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i64] : vector<2xf64> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i64) : i64 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[u:.*]] = urem i64 %[[i8]], 2 +! CHECK: %[[r:.*]] = extractelement <2 x double> %[[x]], i64 %[[u]] +! CHECK: store double %[[r]], ptr %{{[0-9]}}, align 8 +end subroutine vec_extract_testf64 + +! CHECK-LABEL: vec_extract_testi8 +subroutine vec_extract_testi8(x, i1, i2, i4, i8) + vector(integer(1)) :: x + integer(1) :: r + integer(1) :: i1 + integer(2) :: i2 + integer(4) :: i4 + integer(8) :: i8 + r = vec_extract(x, i1) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i8 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i8] : vector<16xi8> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i8) : i8 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<16xi8> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[u:.*]] = urem i8 %[[i1]], 16 +! CHECK: %[[r:.*]] = extractelement <16 x i8> %[[x]], i8 %[[u]] +! CHECK: store i8 %[[r]], ptr %{{[0-9]}}, align 1 + + r = vec_extract(x, i2) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i16 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i16] : vector<16xi8> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i16) : i16 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<16xi8> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[u:.*]] = urem i16 %[[i2]], 16 +! CHECK: %[[r:.*]] = extractelement <16 x i8> %[[x]], i16 %[[u]] +! CHECK: store i8 %[[r]], ptr %{{[0-9]}}, align 1 + + r = vec_extract(x, i4) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i32 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i32] : vector<16xi8> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i32) : i32 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<16xi8> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[u:.*]] = urem i32 %[[i4]], 16 +! CHECK: %[[r:.*]] = extractelement <16 x i8> %[[x]], i32 %[[u]] +! CHECK: store i8 %[[r]], ptr %{{[0-9]}}, align 1 + + r = vec_extract(x, i8) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i64 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i64] : vector<16xi8> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i64) : i64 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<16xi8> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[u:.*]] = urem i64 %[[i8]], 16 +! CHECK: %[[r:.*]] = extractelement <16 x i8> %[[x]], i64 %[[u]] +! CHECK: store i8 %[[r]], ptr %{{[0-9]}}, align 1 +end subroutine vec_extract_testi8 + +! CHECK-LABEL: vec_extract_testi16 +subroutine vec_extract_testi16(x, i1, i2, i4, i8) + vector(integer(2)) :: x + integer(2) :: r + integer(1) :: i1 + integer(2) :: i2 + integer(4) :: i4 + integer(8) :: i8 + r = vec_extract(x, i1) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i8 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i8] : vector<8xi16> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i8) : i8 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<8xi16> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[u:.*]] = urem i8 %[[i1]], 8 +! CHECK: %[[r:.*]] = extractelement <8 x i16> %[[x]], i8 %[[u]] +! CHECK: store i16 %[[r]], ptr %{{[0-9]}}, align 2 + + r = vec_extract(x, i2) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i16 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i16] : vector<8xi16> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i16) : i16 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<8xi16> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[u:.*]] = urem i16 %[[i2]], 8 +! CHECK: %[[r:.*]] = extractelement <8 x i16> %[[x]], i16 %[[u]] +! CHECK: store i16 %[[r]], ptr %{{[0-9]}}, align 2 + + r = vec_extract(x, i4) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i32 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i32] : vector<8xi16> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i32) : i32 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<8xi16> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[u:.*]] = urem i32 %[[i4]], 8 +! CHECK: %[[r:.*]] = extractelement <8 x i16> %[[x]], i32 %[[u]] +! CHECK: store i16 %[[r]], ptr %{{[0-9]}}, align 2 + + r = vec_extract(x, i8) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i64 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i64] : vector<8xi16> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i64) : i64 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<8xi16> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[u:.*]] = urem i64 %[[i8]], 8 +! CHECK: %[[r:.*]] = extractelement <8 x i16> %[[x]], i64 %[[u]] +! CHECK: store i16 %[[r]], ptr %{{[0-9]}}, align 2 +end subroutine vec_extract_testi16 + +! CHECK-LABEL: vec_extract_testi32 +subroutine vec_extract_testi32(x, i1, i2, i4, i8) + vector(integer(4)) :: x + integer(4) :: r + integer(1) :: i1 + integer(2) :: i2 + integer(4) :: i4 + integer(8) :: i8 + r = vec_extract(x, i1) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i8 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i8] : vector<4xi32> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i8) : i8 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[u:.*]] = urem i8 %[[i1]], 4 +! CHECK: %[[r:.*]] = extractelement <4 x i32> %[[x]], i8 %[[u]] +! CHECK: store i32 %[[r]], ptr %{{[0-9]}}, align 4 + + r = vec_extract(x, i2) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i16 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i16] : vector<4xi32> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i16) : i16 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[u:.*]] = urem i16 %[[i2]], 4 +! CHECK: %[[r:.*]] = extractelement <4 x i32> %[[x]], i16 %[[u]] +! CHECK: store i32 %[[r]], ptr %{{[0-9]}}, align 4 + + r = vec_extract(x, i4) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i32 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i32] : vector<4xi32> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i32) : i32 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[u:.*]] = urem i32 %[[i4]], 4 +! CHECK: %[[r:.*]] = extractelement <4 x i32> %[[x]], i32 %[[u]] +! CHECK: store i32 %[[r]], ptr %{{[0-9]}}, align 4 + + r = vec_extract(x, i8) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i64 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i64] : vector<4xi32> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i64) : i64 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[u:.*]] = urem i64 %[[i8]], 4 +! CHECK: %[[r:.*]] = extractelement <4 x i32> %[[x]], i64 %[[u]] +! CHECK: store i32 %[[r]], ptr %{{[0-9]}}, align 4 +end subroutine vec_extract_testi32 + +! CHECK-LABEL: vec_extract_testi64 +subroutine vec_extract_testi64(x, i1, i2, i4, i8) + vector(integer(8)) :: x + integer(8) :: r + integer(1) :: i1 + integer(2) :: i2 + integer(4) :: i4 + integer(8) :: i8 + r = vec_extract(x, i1) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i8 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i8] : vector<2xi64> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i8) : i8 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[u:.*]] = urem i8 %[[i1]], 2 +! CHECK: %[[r:.*]] = extractelement <2 x i64> %[[x]], i8 %[[u]] +! CHECK: store i64 %[[r]], ptr %{{[0-9]}}, align 8 + + r = vec_extract(x, i2) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i16 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i16] : vector<2xi64> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i16) : i16 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[u:.*]] = urem i16 %[[i2]], 2 +! CHECK: %[[r:.*]] = extractelement <2 x i64> %[[x]], i16 %[[u]] +! CHECK: store i64 %[[r]], ptr %{{[0-9]}}, align 8 + + r = vec_extract(x, i4) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i32 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i32] : vector<2xi64> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i32) : i32 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[u:.*]] = urem i32 %[[i4]], 2 +! CHECK: %[[r:.*]] = extractelement <2 x i64> %[[x]], i32 %[[u]] +! CHECK: store i64 %[[r]], ptr %{{[0-9]}}, align 8 + + r = vec_extract(x, i8) +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i64 +! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i64] : vector<2xi64> +! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref + +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i64) : i64 +! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr + +! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[u:.*]] = urem i64 %[[i8]], 2 +! CHECK: %[[r:.*]] = extractelement <2 x i64> %[[x]], i64 %[[u]] +! CHECK: store i64 %[[r]], ptr %{{[0-9]}}, align 8 +end subroutine vec_extract_testi64 diff --git a/flang/test/Lower/PowerPC/ppc-vec-insert-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-insert-elem-order.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/PowerPC/ppc-vec-insert-elem-order.f90 @@ -0,0 +1,61 @@ +! RUN: bbc -emit-fir -fno-ppc-native-vector-element-order %s -o - | FileCheck --check-prefixes="FIR" %s +! RUN: %flang_fc1 -emit-llvm -fno-ppc-native-vector-element-order %s -o - | FileCheck --check-prefixes="LLVMIR" %s +! REQUIRES: target=powerpc{{.*}} + +!CHECK-LABEL: vec_insert_testf32i64 +subroutine vec_insert_testf32i64(v, x, i8) + real(4) :: v + vector(real(4)) :: x + vector(real(4)) :: r + integer(8) :: i8 + r = vec_insert(v, x, i8) + +! FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32> +! FIR: %[[c:.*]] = arith.constant 4 : i64 +! FIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! FIR: %[[c3:.*]] = arith.constant 3 : i64 +! FIR: %[[sub:.*]] = llvm.sub %[[c3]], %[[urem]] : i64 +! FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[sub]] : i64] : vector<4xf32> +! FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! LLVMIR: %[[v:.*]] = load float, ptr %{{[0-9]}}, align 4 +! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! LLVMIR: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! LLVMIR: %[[urem:.*]] = urem i64 %[[i8]], 4 +! LLVMIR: %[[sub:.*]] = sub i64 3, %[[urem]] +! LLVMIR: %[[r:.*]] = insertelement <4 x float> %[[x]], float %[[v]], i64 %[[sub]] +! LLVMIR: store <4 x float> %[[r]], ptr %{{[0-9]}}, align 16 +end subroutine vec_insert_testf32i64 + +!CHECK-LABEL: vec_insert_testi64i8 +subroutine vec_insert_testi64i8(v, x, i1, i2, i4, i8) + integer(8) :: v + vector(integer(8)) :: x + vector(integer(8)) :: r + integer(1) :: i1 + r = vec_insert(v, x, i1) + +! FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64> +! FIR: %[[c:.*]] = arith.constant 2 : i8 +! FIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! FIR: %[[c1:.*]] = arith.constant 1 : i8 +! FIR: %[[sub:.*]] = llvm.sub %[[c1]], %[[urem]] : i8 +! FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[sub]] : i8] : vector<2xi64> +! FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64> +! FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! LLVMIR: %[[v:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16 +! LLVMIR: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! LLVMIR: %[[urem:.*]] = urem i8 %[[i1]], 2 +! LLVMIR: %[[sub:.*]] = sub i8 1, %[[urem]] +! LLVMIR: %[[r:.*]] = insertelement <2 x i64> %[[x]], i64 %[[v]], i8 %[[sub]] +! LLVMIR: store <2 x i64> %[[r]], ptr %{{[0-9]}}, align 16 +end subroutine vec_insert_testi64i8 diff --git a/flang/test/Lower/PowerPC/ppc-vec-insert.f90 b/flang/test/Lower/PowerPC/ppc-vec-insert.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/PowerPC/ppc-vec-insert.f90 @@ -0,0 +1,697 @@ +! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s +! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s +! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s +! REQUIRES: target=powerpc{{.*}} + +! vec_insert + +!CHECK-LABEL: vec_insert_testf32 +subroutine vec_insert_testf32(v, x, i1, i2, i4, i8) + real(4) :: v + vector(real(4)) :: x + vector(real(4)) :: r + integer(1) :: i1 + integer(2) :: i2 + integer(4) :: i4 + integer(8) :: i8 + r = vec_insert(v, x, i1) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i8 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i8] : vector<4xf32> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i8) : i8 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i8] : vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load float, ptr %{{[0-9]}}, align 4 +! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[urem:.*]] = urem i8 %[[i1]], 4 +! CHECK: %[[r:.*]] = insertelement <4 x float> %[[x]], float %[[v]], i8 %[[urem]] +! CHECK: store <4 x float> %[[r]], ptr %{{[0-9]}}, align 16 + + + r = vec_insert(v, x, i2) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i16 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i16] : vector<4xf32> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i16) : i16 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i16] : vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load float, ptr %{{[0-9]}}, align 4 +! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[urem:.*]] = urem i16 %[[i2]], 4 +! CHECK: %[[r:.*]] = insertelement <4 x float> %[[x]], float %[[v]], i16 %[[urem]] +! CHECK: store <4 x float> %[[r]], ptr %{{[0-9]}}, align 16 + + r = vec_insert(v, x, i4) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i32 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i32] : vector<4xf32> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i32) : i32 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i32] : vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load float, ptr %{{[0-9]}}, align 4 +! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[urem:.*]] = urem i32 %[[i4]], 4 +! CHECK: %[[r:.*]] = insertelement <4 x float> %[[x]], float %[[v]], i32 %[[urem]] +! CHECK: store <4 x float> %[[r]], ptr %{{[0-9]}}, align 16 + + r = vec_insert(v, x, i8) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i64 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i64] : vector<4xf32> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i64) : i64 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i64] : vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load float, ptr %{{[0-9]}}, align 4 +! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[urem:.*]] = urem i64 %[[i8]], 4 +! CHECK: %[[r:.*]] = insertelement <4 x float> %[[x]], float %[[v]], i64 %[[urem]] +! CHECK: store <4 x float> %[[r]], ptr %{{[0-9]}}, align 16 +end subroutine vec_insert_testf32 + +!CHECK-LABEL: vec_insert_testf64 +subroutine vec_insert_testf64(v, x, i1, i2, i4, i8) + real(8) :: v + vector(real(8)) :: x + vector(real(8)) :: r + integer(1) :: i1 + integer(2) :: i2 + integer(4) :: i4 + integer(8) :: i8 + r = vec_insert(v, x, i1) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i8 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i8] : vector<2xf64> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i8) : i8 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i8] : vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load double, ptr %{{[0-9]}}, align 8 +! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[urem:.*]] = urem i8 %[[i1]], 2 +! CHECK: %[[r:.*]] = insertelement <2 x double> %[[x]], double %[[v]], i8 %[[urem]] +! CHECK: store <2 x double> %[[r]], ptr %{{[0-9]}}, align 16 + + + r = vec_insert(v, x, i2) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i16 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i16] : vector<2xf64> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i16) : i16 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i16] : vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load double, ptr %{{[0-9]}}, align 8 +! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[urem:.*]] = urem i16 %[[i2]], 2 +! CHECK: %[[r:.*]] = insertelement <2 x double> %[[x]], double %[[v]], i16 %[[urem]] +! CHECK: store <2 x double> %[[r]], ptr %{{[0-9]}}, align 16 + + r = vec_insert(v, x, i4) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i32 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i32] : vector<2xf64> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i32) : i32 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i32] : vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load double, ptr %{{[0-9]}}, align 8 +! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[urem:.*]] = urem i32 %[[i4]], 2 +! CHECK: %[[r:.*]] = insertelement <2 x double> %[[x]], double %[[v]], i32 %[[urem]] +! CHECK: store <2 x double> %[[r]], ptr %{{[0-9]}}, align 16 + + r = vec_insert(v, x, i8) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i64 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i64] : vector<2xf64> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i64) : i64 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i64] : vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load double, ptr %{{[0-9]}}, align 8 +! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[urem:.*]] = urem i64 %[[i8]], 2 +! CHECK: %[[r:.*]] = insertelement <2 x double> %[[x]], double %[[v]], i64 %[[urem]] +! CHECK: store <2 x double> %[[r]], ptr %{{[0-9]}}, align 16 +end subroutine vec_insert_testf64 + +!CHECK-LABEL: vec_insert_testi8 +subroutine vec_insert_testi8(v, x, i1, i2, i4, i8) + integer(1) :: v + vector(integer(1)) :: x + vector(integer(1)) :: r + integer(1) :: i1 + integer(2) :: i2 + integer(4) :: i4 + integer(8) :: i8 + r = vec_insert(v, x, i1) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i8 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i8] : vector<16xi8> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i8) : i8 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i8] : vector<16xi8> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[urem:.*]] = urem i8 %[[i1]], 16 +! CHECK: %[[r:.*]] = insertelement <16 x i8> %[[x]], i8 %[[v]], i8 %[[urem]] +! CHECK: store <16 x i8> %[[r]], ptr %{{[0-9]}}, align 16 + + + r = vec_insert(v, x, i2) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i16 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i16] : vector<16xi8> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i16) : i16 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i16] : vector<16xi8> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[urem:.*]] = urem i16 %[[i2]], 16 +! CHECK: %[[r:.*]] = insertelement <16 x i8> %[[x]], i8 %[[v]], i16 %[[urem]] +! CHECK: store <16 x i8> %[[r]], ptr %{{[0-9]}}, align 16 + + r = vec_insert(v, x, i4) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i32 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i32] : vector<16xi8> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i32) : i32 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i32] : vector<16xi8> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[urem:.*]] = urem i32 %[[i4]], 16 +! CHECK: %[[r:.*]] = insertelement <16 x i8> %[[x]], i8 %[[v]], i32 %[[urem]] +! CHECK: store <16 x i8> %[[r]], ptr %{{[0-9]}}, align 16 + + r = vec_insert(v, x, i8) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i64 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i64] : vector<16xi8> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i64) : i64 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i64] : vector<16xi8> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[urem:.*]] = urem i64 %[[i8]], 16 +! CHECK: %[[r:.*]] = insertelement <16 x i8> %[[x]], i8 %[[v]], i64 %[[urem]] +! CHECK: store <16 x i8> %[[r]], ptr %{{[0-9]}}, align 16 +end subroutine vec_insert_testi8 + +!CHECK-LABEL: vec_insert_testi16 +subroutine vec_insert_testi16(v, x, i1, i2, i4, i8) + integer(2) :: v + vector(integer(2)) :: x + vector(integer(2)) :: r + integer(1) :: i1 + integer(2) :: i2 + integer(4) :: i4 + integer(8) :: i8 + r = vec_insert(v, x, i1) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i8 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i8] : vector<8xi16> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i8) : i8 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i8] : vector<8xi16> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[urem:.*]] = urem i8 %[[i1]], 8 +! CHECK: %[[r:.*]] = insertelement <8 x i16> %[[x]], i16 %[[v]], i8 %[[urem]] +! CHECK: store <8 x i16> %[[r]], ptr %{{[0-9]}}, align 16 + + + r = vec_insert(v, x, i2) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i16 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i16] : vector<8xi16> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i16) : i16 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i16] : vector<8xi16> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[urem:.*]] = urem i16 %[[i2]], 8 +! CHECK: %[[r:.*]] = insertelement <8 x i16> %[[x]], i16 %[[v]], i16 %[[urem]] +! CHECK: store <8 x i16> %[[r]], ptr %{{[0-9]}}, align 16 + + r = vec_insert(v, x, i4) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i32 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i32] : vector<8xi16> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i32) : i32 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i32] : vector<8xi16> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[urem:.*]] = urem i32 %[[i4]], 8 +! CHECK: %[[r:.*]] = insertelement <8 x i16> %[[x]], i16 %[[v]], i32 %[[urem]] +! CHECK: store <8 x i16> %[[r]], ptr %{{[0-9]}}, align 16 + + r = vec_insert(v, x, i8) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i64 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i64] : vector<8xi16> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i64) : i64 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i64] : vector<8xi16> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[urem:.*]] = urem i64 %[[i8]], 8 +! CHECK: %[[r:.*]] = insertelement <8 x i16> %[[x]], i16 %[[v]], i64 %[[urem]] +! CHECK: store <8 x i16> %[[r]], ptr %{{[0-9]}}, align 16 +end subroutine vec_insert_testi16 + +!CHECK-LABEL: vec_insert_testi32 +subroutine vec_insert_testi32(v, x, i1, i2, i4, i8) + integer(4) :: v + vector(integer(4)) :: x + vector(integer(4)) :: r + integer(1) :: i1 + integer(2) :: i2 + integer(4) :: i4 + integer(8) :: i8 + r = vec_insert(v, x, i1) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i8 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i8] : vector<4xi32> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i8) : i8 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i8] : vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[urem:.*]] = urem i8 %[[i1]], 4 +! CHECK: %[[r:.*]] = insertelement <4 x i32> %[[x]], i32 %[[v]], i8 %[[urem]] +! CHECK: store <4 x i32> %[[r]], ptr %{{[0-9]}}, align 16 + + + r = vec_insert(v, x, i2) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i16 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i16] : vector<4xi32> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i16) : i16 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i16] : vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[urem:.*]] = urem i16 %[[i2]], 4 +! CHECK: %[[r:.*]] = insertelement <4 x i32> %[[x]], i32 %[[v]], i16 %[[urem]] +! CHECK: store <4 x i32> %[[r]], ptr %{{[0-9]}}, align 16 + + r = vec_insert(v, x, i4) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i32 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i32] : vector<4xi32> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i32) : i32 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i32] : vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[urem:.*]] = urem i32 %[[i4]], 4 +! CHECK: %[[r:.*]] = insertelement <4 x i32> %[[x]], i32 %[[v]], i32 %[[urem]] +! CHECK: store <4 x i32> %[[r]], ptr %{{[0-9]}}, align 16 + + r = vec_insert(v, x, i8) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i64 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i64] : vector<4xi32> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i64) : i64 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i64] : vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[urem:.*]] = urem i64 %[[i8]], 4 +! CHECK: %[[r:.*]] = insertelement <4 x i32> %[[x]], i32 %[[v]], i64 %[[urem]] +! CHECK: store <4 x i32> %[[r]], ptr %{{[0-9]}}, align 16 +end subroutine vec_insert_testi32 + + +!CHECK-LABEL: vec_insert_testi64 +subroutine vec_insert_testi64(v, x, i1, i2, i4, i8) + integer(8) :: v + vector(integer(8)) :: x + vector(integer(8)) :: r + integer(1) :: i1 + integer(2) :: i2 + integer(4) :: i4 + integer(8) :: i8 + r = vec_insert(v, x, i1) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i8 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i8] : vector<2xi64> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i8) : i8 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i8] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1 +! CHECK: %[[urem:.*]] = urem i8 %[[i1]], 2 +! CHECK: %[[r:.*]] = insertelement <2 x i64> %[[x]], i64 %[[v]], i8 %[[urem]] +! CHECK: store <2 x i64> %[[r]], ptr %{{[0-9]}}, align 16 + + + r = vec_insert(v, x, i2) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i16 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i16] : vector<2xi64> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i16) : i16 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i16] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2 +! CHECK: %[[urem:.*]] = urem i16 %[[i2]], 2 +! CHECK: %[[r:.*]] = insertelement <2 x i64> %[[x]], i64 %[[v]], i16 %[[urem]] +! CHECK: store <2 x i64> %[[r]], ptr %{{[0-9]}}, align 16 + + r = vec_insert(v, x, i4) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i32 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i32] : vector<2xi64> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i32) : i32 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i32] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4 +! CHECK: %[[urem:.*]] = urem i32 %[[i4]], 2 +! CHECK: %[[r:.*]] = insertelement <2 x i64> %[[x]], i64 %[[v]], i32 %[[urem]] +! CHECK: store <2 x i64> %[[r]], ptr %{{[0-9]}}, align 16 + + r = vec_insert(v, x, i8) +! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref> +! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref +! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i64 +! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i64] : vector<2xi64> +! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64> +! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref> + +! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr> +! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr +! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i64) : i64 +! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64 +! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i64] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr> + +! CHECK: %[[v:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16 +! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8 +! CHECK: %[[urem:.*]] = urem i64 %[[i8]], 2 +! CHECK: %[[r:.*]] = insertelement <2 x i64> %[[x]], i64 %[[v]], i64 %[[urem]] +! CHECK: store <2 x i64> %[[r]], ptr %{{[0-9]}}, align 16 +end subroutine vec_insert_testi64 diff --git a/flang/test/Lower/PowerPC/ppc-vec-merge-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-merge-elem-order.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/PowerPC/ppc-vec-merge-elem-order.f90 @@ -0,0 +1,49 @@ +! RUN: bbc -emit-fir %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="FIR" %s +! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="LLVMIR" %s +! REQUIRES: target=powerpc{{.*}} + +!----------------- +! vec_mergeh +!----------------- + +! CHECK-LABEL: vec_mergeh_test_i4 +subroutine vec_mergeh_test_i4(arg1, arg2) + vector(integer(4)) :: arg1, arg2, r + r = vec_mergeh(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32> +! FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [6, 2, 7, 3] : vector<4xi32>, vector<4xi32> +! FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32> +! FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! LLVMIR: %[[r:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> %[[arg2]], <4 x i32> +! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergeh_test_i4 + +!----------------- +! vec_mergel +!----------------- + +! CHECK-LABEL: vec_mergel_test_r8 +subroutine vec_mergel_test_r8(arg1, arg2) + vector(real(8)) :: arg1, arg2, r + r = vec_mergel(arg1, arg2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64> +! FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64> +! FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [2, 0] : vector<2xf64>, vector<2xf64> +! FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64> +! FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! LLVMIR: %[[r:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> +! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergel_test_r8 diff --git a/flang/test/Lower/PowerPC/ppc-vec-merge.f90 b/flang/test/Lower/PowerPC/ppc-vec-merge.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/PowerPC/ppc-vec-merge.f90 @@ -0,0 +1,492 @@ +! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s +! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s +! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s +! REQUIRES: target=powerpc{{.*}} + +!------------ +! vec_mergeh +!------------ + + ! CHECK-LABEL: vec_mergeh_test_i1 +subroutine vec_mergeh_test_i1(arg1, arg2) + vector(integer(1)) :: arg1, arg2, r + r = vec_mergeh(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] : vector<16xi8>, vector<16xi8> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] : vector<16xi8> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> +! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergeh_test_i1 + +! CHECK-LABEL: vec_mergeh_test_i2 +subroutine vec_mergeh_test_i2(arg1, arg2) + vector(integer(2)) :: arg1, arg2, r + r = vec_mergeh(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 8, 1, 9, 2, 10, 3, 11] : vector<8xi16>, vector<8xi16> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 8, 1, 9, 2, 10, 3, 11] : vector<8xi16> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> %[[arg2]], <8 x i32> +! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergeh_test_i2 + +! CHECK-LABEL: vec_mergeh_test_i4 +subroutine vec_mergeh_test_i4(arg1, arg2) + vector(integer(4)) :: arg1, arg2, r + r = vec_mergeh(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 4, 1, 5] : vector<4xi32>, vector<4xi32> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 4, 1, 5] : vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> %[[arg2]], <4 x i32> +! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergeh_test_i4 + +! CHECK-LABEL: vec_mergeh_test_i8 +subroutine vec_mergeh_test_i8(arg1, arg2) + vector(integer(8)) :: arg1, arg2, r + r = vec_mergeh(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 2] : vector<2xi64>, vector<2xi64> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 2] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> +! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergeh_test_i8 + +! CHECK-LABEL: vec_mergeh_test_u1 +subroutine vec_mergeh_test_u1(arg1, arg2) + vector(unsigned(1)) :: arg1, arg2, r + r = vec_mergeh(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] : vector<16xi8>, vector<16xi8> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] : vector<16xi8> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> +! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergeh_test_u1 + +! CHECK-LABEL: vec_mergeh_test_u2 +subroutine vec_mergeh_test_u2(arg1, arg2) + vector(unsigned(2)) :: arg1, arg2, r + r = vec_mergeh(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 8, 1, 9, 2, 10, 3, 11] : vector<8xi16>, vector<8xi16> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:ui16> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 8, 1, 9, 2, 10, 3, 11] : vector<8xi16> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> %[[arg2]], <8 x i32> +! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergeh_test_u2 + +! CHECK-LABEL: vec_mergeh_test_u4 +subroutine vec_mergeh_test_u4(arg1, arg2) + vector(unsigned(4)) :: arg1, arg2, r + r = vec_mergeh(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 4, 1, 5] : vector<4xi32>, vector<4xi32> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:ui32> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 4, 1, 5] : vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> %[[arg2]], <4 x i32> +! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergeh_test_u4 + +! CHECK-LABEL: vec_mergeh_test_u8 +subroutine vec_mergeh_test_u8(arg1, arg2) + vector(unsigned(8)) :: arg1, arg2, r + r = vec_mergeh(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 2] : vector<2xi64>, vector<2xi64> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:ui64> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 2] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> +! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergeh_test_u8 + +! CHECK-LABEL: vec_mergeh_test_r4 +subroutine vec_mergeh_test_r4(arg1, arg2) + vector(real(4)) :: arg1, arg2, r + r = vec_mergeh(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 4, 1, 5] : vector<4xf32>, vector<4xf32> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 4, 1, 5] : vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <4 x float> %[[arg1]], <4 x float> %[[arg2]], <4 x i32> +! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergeh_test_r4 + +! CHECK-LABEL: vec_mergeh_test_r8 +subroutine vec_mergeh_test_r8(arg1, arg2) + vector(real(8)) :: arg1, arg2, r + r = vec_mergeh(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 2] : vector<2xf64>, vector<2xf64> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 2] : vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> +! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergeh_test_r8 + +!------------ +! vec_mergel +!------------ + +! CHECK-LABEL: vec_mergel_test_i1 +subroutine vec_mergel_test_i1(arg1, arg2) + vector(integer(1)) :: arg1, arg2, r + r = vec_mergel(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] : vector<16xi8>, vector<16xi8> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] : vector<16xi8> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> +! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergel_test_i1 + +! CHECK-LABEL: vec_mergel_test_i2 +subroutine vec_mergel_test_i2(arg1, arg2) + vector(integer(2)) :: arg1, arg2, r + r = vec_mergel(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [4, 12, 5, 13, 6, 14, 7, 15] : vector<8xi16>, vector<8xi16> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [4, 12, 5, 13, 6, 14, 7, 15] : vector<8xi16> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> %[[arg2]], <8 x i32> +! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergel_test_i2 + +! CHECK-LABEL: vec_mergel_test_i4 +subroutine vec_mergel_test_i4(arg1, arg2) + vector(integer(4)) :: arg1, arg2, r + r = vec_mergel(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [2, 6, 3, 7] : vector<4xi32>, vector<4xi32> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [2, 6, 3, 7] : vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> %[[arg2]], <4 x i32> +! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergel_test_i4 + +! CHECK-LABEL: vec_mergel_test_i8 +subroutine vec_mergel_test_i8(arg1, arg2) + vector(integer(8)) :: arg1, arg2, r + r = vec_mergel(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [1, 3] : vector<2xi64>, vector<2xi64> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 3] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> +! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergel_test_i8 + +! CHECK-LABEL: vec_mergel_test_u1 +subroutine vec_mergel_test_u1(arg1, arg2) + vector(unsigned(1)) :: arg1, arg2, r + r = vec_mergel(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] : vector<16xi8>, vector<16xi8> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] : vector<16xi8> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> +! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergel_test_u1 + +! CHECK-LABEL: vec_mergel_test_u2 +subroutine vec_mergel_test_u2(arg1, arg2) + vector(unsigned(2)) :: arg1, arg2, r + r = vec_mergel(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [4, 12, 5, 13, 6, 14, 7, 15] : vector<8xi16>, vector<8xi16> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:ui16> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [4, 12, 5, 13, 6, 14, 7, 15] : vector<8xi16> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> %[[arg2]], <8 x i32> +! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergel_test_u2 + +! CHECK-LABEL: vec_mergel_test_u4 +subroutine vec_mergel_test_u4(arg1, arg2) + vector(unsigned(4)) :: arg1, arg2, r + r = vec_mergel(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [2, 6, 3, 7] : vector<4xi32>, vector<4xi32> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:ui32> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [2, 6, 3, 7] : vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> %[[arg2]], <4 x i32> +! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergel_test_u4 + +! CHECK-LABEL: vec_mergel_test_u8 +subroutine vec_mergel_test_u8(arg1, arg2) + vector(unsigned(8)) :: arg1, arg2, r + r = vec_mergel(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [1, 3] : vector<2xi64>, vector<2xi64> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:ui64> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 3] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> +! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergel_test_u8 + +! CHECK-LABEL: vec_mergel_test_r4 +subroutine vec_mergel_test_r4(arg1, arg2) + vector(real(4)) :: arg1, arg2, r + r = vec_mergel(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [2, 6, 3, 7] : vector<4xf32>, vector<4xf32> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [2, 6, 3, 7] : vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <4 x float> %[[arg1]], <4 x float> %[[arg2]], <4 x i32> +! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergel_test_r4 + +! CHECK-LABEL: vec_mergel_test_r8 +subroutine vec_mergel_test_r8(arg1, arg2) + vector(real(8)) :: arg1, arg2, r + r = vec_mergel(arg1, arg2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [1, 3] : vector<2xf64>, vector<2xf64> +! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64> +! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 3] : vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> +! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_mergel_test_r8 diff --git a/flang/test/Lower/PowerPC/ppc-vec-perm-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-perm-elem-order.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/PowerPC/ppc-vec-perm-elem-order.f90 @@ -0,0 +1,60 @@ +! RUN: bbc -emit-fir %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="FIR" %s +! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="LLVMIR" %s +! REQUIRES: target=powerpc{{.*}} + +!---------------- +! vec_perm +!---------------- + +! CHECK-LABEL: vec_perm_test_i1 +subroutine vec_perm_test_i1(arg1, arg2, arg3) + vector(integer(1)) :: arg1, arg2, r + vector(unsigned(1)) :: arg3 + r = vec_perm(arg1, arg2, arg3) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8> +! FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8> +! FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<16xi8> to vector<4xi32> +! FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<16xi8> to vector<4xi32> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg1]], %[[barg2]], %[[carg3]]) fastmath : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32> +! FIR: %[[vcall:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32> +! FIR: %[[bcall:.*]] = llvm.bitcast %[[vcall]] : vector<4xi32> to vector<16xi8> +! FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<16xi8>) -> !fir.vector<16:i8> +! FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! LLVMIR: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! LLVMIR: %[[barg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32> +! LLVMIR: %[[barg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32> +! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg1]], <4 x i32> %[[barg2]], <16 x i8> %[[arg3]]) +! LLVMIR: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <16 x i8> +! LLVMIR: store <16 x i8> %[[bcall]], ptr %{{.*}}, align 16 +end subroutine vec_perm_test_i1 + +!---------------- +! vec_permi +!---------------- + +! CHECK-LABEL: vec_permi_test_i8i2 +subroutine vec_permi_test_i8i2(arg1, arg2, arg3) + vector(integer(8)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 2_2) + +! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64> +! FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64> +! FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [3, 0] : vector<2xi64>, vector<2xi64> +! FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:i64> +! FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! LLVMIR: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> +! LLVMIR: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_i8i2 diff --git a/flang/test/Lower/PowerPC/ppc-vec-perm.f90 b/flang/test/Lower/PowerPC/ppc-vec-perm.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/PowerPC/ppc-vec-perm.f90 @@ -0,0 +1,872 @@ +! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s +! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s +! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s +! REQUIRES: target=powerpc{{.*}} + +! CHECK-LABEL: vec_perm_test_i1 +subroutine vec_perm_test_i1(arg1, arg2, arg3) + vector(integer(1)) :: arg1, arg2, r + vector(unsigned(1)) :: arg3 + r = vec_perm(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8> +! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<16xi8> to vector<4xi32> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<16xi8> to vector<4xi32> +! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8> +! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32> +! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<16xi8> +! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<16xi8>) -> !fir.vector<16:i8> +! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32> +! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32> +! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8 +! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]] : vector<16xi8> +! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32> +! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<16xi8> +! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[barg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32> +! CHECK: %[[barg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32> +! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], +! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]]) +! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <16 x i8> +! CHECK: store <16 x i8> %[[bcall]], ptr %{{.*}}, align 16 +end subroutine vec_perm_test_i1 + +! CHECK-LABEL: vec_perm_test_i2 +subroutine vec_perm_test_i2(arg1, arg2, arg3) + vector(integer(2)) :: arg1, arg2, r + vector(unsigned(1)) :: arg3 + r = vec_perm(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16> +! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<4xi32> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<4xi32> +! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8> +! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32> +! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<8xi16> +! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<8xi16>) -> !fir.vector<8:i16> +! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32> +! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32> +! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8 +! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]] : vector<16xi8> +! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32> +! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<8xi16> +! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32> +! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32> +! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], +! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]]) +! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <8 x i16> +! CHECK: store <8 x i16> %[[bcall]], ptr %{{.*}}, align 16 +end subroutine vec_perm_test_i2 + +! CHECK-LABEL: vec_perm_test_i4 +subroutine vec_perm_test_i4(arg1, arg2, arg3) + vector(integer(4)) :: arg1, arg2, r + vector(unsigned(1)) :: arg3 + r = vec_perm(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8> +! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[carg2]], %[[carg1]], %[[xor]]) fastmath : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32> +! CHECK-FIR: fir.store %[[call]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8 +! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]] : vector<16xi8> +! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[arg2]], %[[arg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[call]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], +! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]], <16 x i8> %[[xor]]) +! CHECK: store <4 x i32> %[[call]], ptr %{{.*}}, align 16 +end subroutine vec_perm_test_i4 + +! CHECK-LABEL: vec_perm_test_i8 +subroutine vec_perm_test_i8(arg1, arg2, arg3) + vector(integer(8)) :: arg1, arg2, r + vector(unsigned(1)) :: arg3 + r = vec_perm(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<4xi32> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<4xi32> +! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8> +! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32> +! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<2xi64> +! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<2xi64>) -> !fir.vector<2:i64> +! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<2xi64> to vector<4xi32> +! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<2xi64> to vector<4xi32> +! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8 +! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]] : vector<16xi8> +! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32> +! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <4 x i32> +! CHECK: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <4 x i32> +! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], +! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]]) +! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <2 x i64> +! CHECK: store <2 x i64> %[[bcall]], ptr %{{.*}}, align 16 +end subroutine vec_perm_test_i8 + +! CHECK-LABEL: vec_perm_test_u1 +subroutine vec_perm_test_u1(arg1, arg2, arg3) + vector(unsigned(1)) :: arg1, arg2, r + vector(unsigned(1)) :: arg3 + r = vec_perm(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<16xi8> to vector<4xi32> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<16xi8> to vector<4xi32> +! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8> +! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32> +! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<16xi8> +! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<16xi8>) -> !fir.vector<16:ui8> +! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32> +! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32> +! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8 +! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]] : vector<16xi8> +! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32> +! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<16xi8> +! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[barg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32> +! CHECK: %[[barg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32> +! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], +! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]]) +! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <16 x i8> +! CHECK: store <16 x i8> %[[bcall]], ptr %{{.*}}, align 16 +end subroutine vec_perm_test_u1 + +! CHECK-LABEL: vec_perm_test_u2 +subroutine vec_perm_test_u2(arg1, arg2, arg3) + vector(unsigned(2)) :: arg1, arg2, r + vector(unsigned(1)) :: arg3 + r = vec_perm(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16> +! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<4xi32> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<4xi32> +! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8> +! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32> +! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<8xi16> +! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<8xi16>) -> !fir.vector<8:ui16> +! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32> +! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32> +! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8 +! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]] : vector<16xi8> +! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32> +! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<8xi16> +! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32> +! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32> +! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], +! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]]) +! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <8 x i16> +! CHECK: store <8 x i16> %[[bcall]], ptr %{{.*}}, align 16 +end subroutine vec_perm_test_u2 + +! CHECK-LABEL: vec_perm_test_u4 +subroutine vec_perm_test_u4(arg1, arg2, arg3) + vector(unsigned(4)) :: arg1, arg2, r + vector(unsigned(1)) :: arg3 + r = vec_perm(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32> +! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8> +! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[carg2]], %[[carg1]], %[[xor]]) fastmath : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32> +! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[call2]] : (vector<4xi32>) -> !fir.vector<4:ui32> +! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8 +! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]] : vector<16xi8> +! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[arg2]], %[[arg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32> +! CHECK-LLVMIR: llvm.store %[[call]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], +! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]], <16 x i8> %[[xor]]) +! CHECK: store <4 x i32> %[[call]], ptr %{{.*}}, align 16 +end subroutine vec_perm_test_u4 + +! CHECK-LABEL: vec_perm_test_u8 +subroutine vec_perm_test_u8(arg1, arg2, arg3) + vector(unsigned(8)) :: arg1, arg2, r + vector(unsigned(1)) :: arg3 + r = vec_perm(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<4xi32> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<4xi32> +! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8> +! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32> +! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<2xi64> +! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<2xi64>) -> !fir.vector<2:ui64> +! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<2xi64> to vector<4xi32> +! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<2xi64> to vector<4xi32> +! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8 +! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]] : vector<16xi8> +! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32> +! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <4 x i32> +! CHECK: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <4 x i32> +! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], +! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]]) +! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <2 x i64> +! CHECK: store <2 x i64> %[[bcall]], ptr %{{.*}}, align 16 +end subroutine vec_perm_test_u8 + +! CHECK-LABEL: vec_perm_test_r4 +subroutine vec_perm_test_r4(arg1, arg2, arg3) + vector(real(4)) :: arg1, arg2, r + vector(unsigned(1)) :: arg3 + r = vec_perm(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<4xi32> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<4xi32> +! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8> +! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32> +! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<4xf32> +! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<4xi32> +! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<4xf32> to vector<4xi32> +! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8 +! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]] : vector<16xi8> +! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32> +! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32> +! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <4 x i32> +! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], +! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]]) +! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <4 x float> +! CHECK: store <4 x float> %[[bcall]], ptr %{{.*}}, align 16 +end subroutine vec_perm_test_r4 + +! CHECK-LABEL: vec_perm_test_r8 +subroutine vec_perm_test_r8(arg1, arg2, arg3) + vector(real(8)) :: arg1, arg2, r + vector(unsigned(1)) :: arg3 + r = vec_perm(arg1, arg2, arg3) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<4xi32> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<4xi32> +! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8 +! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8> +! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8> +! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32> +! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32> +! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<2xf64> +! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<2xf64>) -> !fir.vector<2:f64> +! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<2xf64> to vector<4xi32> +! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<2xf64> to vector<4xi32> +! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8 +! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8> +! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]] : vector<16xi8> +! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32> +! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16 +! CHECK: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <4 x i32> +! CHECK: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <4 x i32> +! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], +! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]]) +! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <2 x double> +! CHECK: store <2 x double> %[[bcall]], ptr %{{.*}}, align 16 +end subroutine vec_perm_test_r8 + +! CHECK-LABEL: vec_permi_test_i8i1 +subroutine vec_permi_test_i8i1(arg1, arg2, arg3) + vector(integer(8)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 3_1) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [1, 3] : vector<2xi64>, vector<2xi64> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:i64> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 3] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> +! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_i8i1 + +! CHECK-LABEL: vec_permi_test_i8i2 +subroutine vec_permi_test_i8i2(arg1, arg2, arg3) + vector(integer(8)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 2_2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [1, 2] : vector<2xi64>, vector<2xi64> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:i64> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 2] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> +! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_i8i2 + +! CHECK-LABEL: vec_permi_test_i8i4 +subroutine vec_permi_test_i8i4(arg1, arg2, arg3) + vector(integer(8)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 1_4) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 3] : vector<2xi64>, vector<2xi64> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:i64> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 3] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> +! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_i8i4 + +! CHECK-LABEL: vec_permi_test_i8i8 +subroutine vec_permi_test_i8i8(arg1, arg2, arg3) + vector(integer(8)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 0_8) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 2] : vector<2xi64>, vector<2xi64> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:i64> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 2] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> +! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_i8i8 + +! CHECK-LABEL: vec_permi_test_u8i1 +subroutine vec_permi_test_u8i1(arg1, arg2, arg3) + vector(unsigned(8)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 3_1) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [1, 3] : vector<2xi64>, vector<2xi64> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:ui64> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 3] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> +! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_u8i1 + +! CHECK-LABEL: vec_permi_test_u8i2 +subroutine vec_permi_test_u8i2(arg1, arg2, arg3) + vector(unsigned(8)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 2_2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [1, 2] : vector<2xi64>, vector<2xi64> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:ui64> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 2] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> +! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_u8i2 + +! CHECK-LABEL: vec_permi_test_u8i4 +subroutine vec_permi_test_u8i4(arg1, arg2, arg3) + vector(unsigned(8)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 1_4) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 3] : vector<2xi64>, vector<2xi64> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:ui64> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 3] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> +! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_u8i4 + +! CHECK-LABEL: vec_permi_test_u8i8 +subroutine vec_permi_test_u8i8(arg1, arg2, arg3) + vector(unsigned(8)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 0_8) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 2] : vector<2xi64>, vector<2xi64> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:ui64> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 2] : vector<2xi64> +! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 +! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> +! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_u8i8 + +! CHECK-LABEL: vec_permi_test_r4i1 +subroutine vec_permi_test_r4i1(arg1, arg2, arg3) + vector(real(4)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 3_1) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<2xf64> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<2xf64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [1, 3] : vector<2xf64>, vector<2xf64> +! CHECK-FIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[bshuf]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<2xf64> +! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<4xf32> to vector<2xf64> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[barg1]], %[[barg2]] [1, 3] : vector<2xf64> +! CHECK-LLVMIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[bshuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <2 x double> +! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <2 x double> +! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[barg1]], <2 x double> %[[barg2]], <2 x i32> +! CHECK: %[[bshuf:.*]] = bitcast <2 x double> %[[shuf]] to <4 x float> +! CHECK: store <4 x float> %[[bshuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_r4i1 + +! CHECK-LABEL: vec_permi_test_r4i2 +subroutine vec_permi_test_r4i2(arg1, arg2, arg3) + vector(real(4)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 2_2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<2xf64> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<2xf64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [1, 2] : vector<2xf64>, vector<2xf64> +! CHECK-FIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[bshuf]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<2xf64> +! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<4xf32> to vector<2xf64> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[barg1]], %[[barg2]] [1, 2] : vector<2xf64> +! CHECK-LLVMIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[bshuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <2 x double> +! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <2 x double> +! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[barg1]], <2 x double> %[[barg2]], <2 x i32> +! CHECK: %[[bshuf:.*]] = bitcast <2 x double> %[[shuf]] to <4 x float> +! CHECK: store <4 x float> %[[bshuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_r4i2 + +! CHECK-LABEL: vec_permi_test_r4i4 +subroutine vec_permi_test_r4i4(arg1, arg2, arg3) + vector(real(4)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 1_4) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<2xf64> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<2xf64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [0, 3] : vector<2xf64>, vector<2xf64> +! CHECK-FIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[bshuf]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<2xf64> +! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<4xf32> to vector<2xf64> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[barg1]], %[[barg2]] [0, 3] : vector<2xf64> +! CHECK-LLVMIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[bshuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <2 x double> +! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <2 x double> +! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[barg1]], <2 x double> %[[barg2]], <2 x i32> +! CHECK: %[[bshuf:.*]] = bitcast <2 x double> %[[shuf]] to <4 x float> +! CHECK: store <4 x float> %[[bshuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_r4i4 + +! CHECK-LABEL: vec_permi_test_r4i8 +subroutine vec_permi_test_r4i8(arg1, arg2, arg3) + vector(real(4)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 0_8) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<2xf64> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<2xf64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [0, 2] : vector<2xf64>, vector<2xf64> +! CHECK-FIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[bshuf]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<2xf64> +! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<4xf32> to vector<2xf64> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[barg1]], %[[barg2]] [0, 2] : vector<2xf64> +! CHECK-LLVMIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32> +! CHECK-LLVMIR: llvm.store %[[bshuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <2 x double> +! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <2 x double> +! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[barg1]], <2 x double> %[[barg2]], <2 x i32> +! CHECK: %[[bshuf:.*]] = bitcast <2 x double> %[[shuf]] to <4 x float> +! CHECK: store <4 x float> %[[bshuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_r4i8 + +! CHECK-LABEL: vec_permi_test_r8i1 +subroutine vec_permi_test_r8i1(arg1, arg2, arg3) + vector(real(8)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 3_1) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<2xf64> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<2xf64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [1, 3] : vector<2xf64>, vector<2xf64> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xf64>) -> !fir.vector<2:f64> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 3] : vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> +! CHECK: store <2 x double> %[[shuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_r8i1 + +! CHECK-LABEL: vec_permi_test_r8i2 +subroutine vec_permi_test_r8i2(arg1, arg2, arg3) + vector(real(8)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 2_2) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<2xf64> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<2xf64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [1, 2] : vector<2xf64>, vector<2xf64> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xf64>) -> !fir.vector<2:f64> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 2] : vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> +! CHECK: store <2 x double> %[[shuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_r8i2 + +! CHECK-LABEL: vec_permi_test_r8i4 +subroutine vec_permi_test_r8i4(arg1, arg2, arg3) + vector(real(8)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 1_4) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<2xf64> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<2xf64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [0, 3] : vector<2xf64>, vector<2xf64> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xf64>) -> !fir.vector<2:f64> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 3] : vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> +! CHECK: store <2 x double> %[[shuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_r8i4 + +! CHECK-LABEL: vec_permi_test_r8i8 +subroutine vec_permi_test_r8i8(arg1, arg2, arg3) + vector(real(8)) :: arg1, arg2, r + r = vec_permi(arg1, arg2, 0_8) + +! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<2xf64> +! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<2xf64> +! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [0, 2] : vector<2xf64>, vector<2xf64> +! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xf64>) -> !fir.vector<2:f64> +! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref> + +! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr> +! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 2] : vector<2xf64> +! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr> + +! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> +! CHECK: store <2 x double> %[[shuf]], ptr %{{.*}}, align 16 +end subroutine vec_permi_test_r8i8 diff --git a/flang/test/Semantics/PowerPC/ppc-vector-intrinsics.f90 b/flang/test/Semantics/PowerPC/ppc-vector-intrinsics.f90 --- a/flang/test/Semantics/PowerPC/ppc-vector-intrinsics.f90 +++ b/flang/test/Semantics/PowerPC/ppc-vector-intrinsics.f90 @@ -21,3 +21,12 @@ ! ERROR: Argument #2 must be a constant expression in range 0-31 rr = vec_ctf(arg1, 37) end program test + +subroutine test_vec_permi() + vector(integer(8)) :: arg1, arg2, r + integer :: arg3 +!ERROR: Actual argument #3 must be a constant expression + r = vec_permi(arg1, arg2, arg3) +! ERROR: Argument #3 must be a constant expression in range 0-3 + r = vec_permi(arg1, arg2, 11) +end