Index: flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
===================================================================
--- flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
+++ flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
@@ -29,6 +29,10 @@
   Convert,
   Ctf,
   Cvf,
+  Ld,
+  Lde,
+  Ldl,
+  Lxvp,
   Mergeh,
   Mergel,
   Msub,
@@ -49,6 +53,8 @@
   Ste,
   Stxv,
   Sub,
+  Xld2,
+  Xlw4,
   Xor,
   Xst,
   Xst_be,
@@ -192,6 +198,10 @@
   fir::ExtendedValue genVecPerm(mlir::Type resultType,
                                 llvm::ArrayRef<fir::ExtendedValue> args);
 
+  template <VecOp>
+  fir::ExtendedValue genVecLdCallGrp(mlir::Type resultType,
+                                     llvm::ArrayRef<fir::ExtendedValue> args);
+
   template <VecOp>
   fir::ExtendedValue genVecNmaddMsub(mlir::Type resultType,
                                      llvm::ArrayRef<fir::ExtendedValue> args);
Index: flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
===================================================================
--- flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
+++ flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
@@ -129,6 +129,26 @@
      static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecInsert),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
      /*isElemental=*/true},
+    {"__ppc_vec_ld",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdCallGrp<VecOp::Ld>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
+    {"__ppc_vec_lde",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdCallGrp<VecOp::Lde>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
+    {"__ppc_vec_ldl",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdCallGrp<VecOp::Ldl>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
+    {"__ppc_vec_lxvp",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdCallGrp<VecOp::Lxvp>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
     {"__ppc_vec_mergeh",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecMerge<VecOp::Mergeh>),
@@ -228,6 +248,16 @@
          &PI::genVecAddAndMulSubXor<VecOp::Sub>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
+    {"__ppc_vec_xld2_",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdCallGrp<VecOp::Xld2>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
+    {"__ppc_vec_xlw4_",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdCallGrp<VecOp::Xlw4>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
     {"__ppc_vec_xor",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecAddAndMulSubXor<VecOp::Xor>),
@@ -1273,6 +1303,132 @@
   return builder.createConvert(loc, resultType, callOp);
 }
 
+static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder,
+                                      mlir::Location loc, mlir::Value baseAddr,
+                                      mlir::Value offset) {
+  auto typeExtent{fir::SequenceType::getUnknownExtent()};
+  // Construct an !fir.ref<!ref.array<?xi8>> type
+  auto arrRefTy{builder.getRefType(fir::SequenceType::get(
+      {typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))};
+  // Convert arg to !fir.ref<!ref.array<?xi8>>
+  auto resAddr{builder.create<fir::ConvertOp>(loc, arrRefTy, baseAddr)};
+
+  return builder.create<fir::CoordinateOp>(loc, arrRefTy, resAddr, offset);
+}
+
+static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder,
+                                         mlir::Location loc, mlir::Value v,
+                                         int64_t len) {
+  assert(v.getType().isa<mlir::VectorType>());
+  assert(len > 0);
+  llvm::SmallVector<int64_t, 16> mask;
+  for (int64_t i = 0; i < len; ++i) {
+    mask.push_back(len - 1 - i);
+  }
+  auto undefVec{builder.create<fir::UndefOp>(loc, v.getType())};
+  return builder.create<mlir::vector::ShuffleOp>(loc, v, undefVec, mask);
+}
+
+// VEC_LD, VEC_LDE, VEC_LDL, VEC_LXVP, VEC_XLD2, VEC_XLW4
+template <VecOp vop>
+fir::ExtendedValue
+PPCIntrinsicLibrary::genVecLdCallGrp(mlir::Type resultType,
+                                     llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 2);
+  auto context{builder.getContext()};
+  auto arg0{getBase(args[0])};
+  auto arg1{getBase(args[1])};
+
+  // Prepare the return type in FIR.
+  auto vecResTyInfo{getVecTypeFromFirType(resultType)};
+  auto mlirTy{vecResTyInfo.toMlirVectorType(context)};
+  auto firTy{vecResTyInfo.toFirVectorType()};
+
+  // llvm.ppc.altivec.lvx* returns <4xi32>
+  // Others, like "llvm.ppc.altivec.lvebx" too if arg2 is not of Integer type
+  const auto i32Ty{mlir::IntegerType::get(builder.getContext(), 32)};
+  const auto mVecI32Ty{mlir::VectorType::get(4, i32Ty)};
+
+  // For vec_ld, need to convert arg0 from i64 to i32
+  if (vop == VecOp::Ld && arg0.getType().getIntOrFloatBitWidth() == 64)
+    arg0 = builder.createConvert(loc, i32Ty, arg0);
+
+  // Add the %val of arg0 to %addr of arg1
+  auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
+  llvm::SmallVector<mlir::Value, 4> parsedArgs{addr};
+
+  mlir::Type intrinResTy{nullptr};
+  llvm::StringRef fname{};
+  switch (vop) {
+  case VecOp::Ld:
+    fname = "llvm.ppc.altivec.lvx";
+    intrinResTy = mVecI32Ty;
+    break;
+  case VecOp::Lde:
+    switch (vecResTyInfo.eleTy.getIntOrFloatBitWidth()) {
+    case 8:
+      fname = "llvm.ppc.altivec.lvebx";
+      intrinResTy = mlirTy;
+      break;
+    case 16:
+      fname = "llvm.ppc.altivec.lvehx";
+      intrinResTy = mlirTy;
+      break;
+    case 32:
+      fname = "llvm.ppc.altivec.lvewx";
+      if (mlir::isa<mlir::IntegerType>(vecResTyInfo.eleTy))
+        intrinResTy = mlirTy;
+      else
+        intrinResTy = mVecI32Ty;
+      break;
+    default:
+      llvm_unreachable("invalid vector for vec_lde");
+    }
+    break;
+  case VecOp::Ldl:
+    fname = "llvm.ppc.altivec.lvxl";
+    intrinResTy = mVecI32Ty;
+    break;
+  case VecOp::Lxvp:
+    fname = "llvm.ppc.vsx.lxvp";
+    intrinResTy = fir::VectorType::get(256, mlir::IntegerType::get(context, 1));
+    break;
+  case VecOp::Xld2: {
+    fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvd2x.be"
+                                   : "llvm.ppc.vsx.lxvd2x";
+    // llvm.ppc.altivec.lxvd2x* returns <2 x double>
+    intrinResTy = mlir::VectorType::get(2, mlir::FloatType::getF64(context));
+  } break;
+  case VecOp::Xlw4:
+    fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvw4x.be"
+                                   : "llvm.ppc.vsx.lxvw4x";
+    // llvm.ppc.altivec.lxvw4x* returns <4xi32>
+    intrinResTy = mVecI32Ty;
+    break;
+  default:
+    llvm_unreachable("invalid vector operation for generator");
+  }
+
+  auto funcType{
+      mlir::FunctionType::get(context, {addr.getType()}, {intrinResTy})};
+  auto funcOp{builder.addNamedFunction(loc, fname, funcType)};
+  auto result{
+      builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)};
+
+  if (vop == VecOp::Lxvp)
+    return result;
+
+  if (intrinResTy != mlirTy)
+    result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, result);
+
+  if (vop != VecOp::Xld2 && vop != VecOp::Xlw4 && isBEVecElemOrderOnLE())
+    return builder.createConvert(
+        loc, firTy,
+        reverseVectorElements(builder, loc, result, vecResTyInfo.len));
+
+  return builder.createConvert(loc, firTy, result);
+}
+
 // VEC_NMADD, VEC_MSUB
 template <VecOp vop>
 fir::ExtendedValue
@@ -1715,33 +1871,6 @@
   }
 }
 
-static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder,
-                                      mlir::Location loc, mlir::Value baseAddr,
-                                      mlir::Value offset) {
-  auto typeExtent{fir::SequenceType::getUnknownExtent()};
-  // Construct an !fir.ref<!ref.array<?xi8>> type
-  auto arrRefTy{builder.getRefType(fir::SequenceType::get(
-      {typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))};
-  // Convert arg to !fir.ref<!ref.array<?xi8>>
-  auto resAddr{builder.create<fir::ConvertOp>(loc, arrRefTy, baseAddr)};
-
-  return builder.create<fir::CoordinateOp>(loc, arrRefTy, resAddr, offset);
-}
-
-static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder,
-                                         mlir::Location loc, mlir::Value v,
-                                         int64_t len) {
-  assert(v.getType().isa<mlir::VectorType>());
-  assert(len > 0);
-  llvm::SmallVector<int64_t, 16> mask;
-  for (int64_t i = 0; i < len; ++i) {
-    mask.push_back(len - 1 - i);
-  }
-
-  auto undefVec{builder.create<fir::UndefOp>(loc, v.getType())};
-  return builder.create<mlir::vector::ShuffleOp>(loc, v, undefVec, mask);
-}
-
 // VEC_ST, VEC_STE
 template <VecOp vop>
 void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef<fir::ExtendedValue> args) {
Index: flang/module/__ppc_intrinsics.f90
===================================================================
--- flang/module/__ppc_intrinsics.f90
+++ flang/module/__ppc_intrinsics.f90
@@ -150,6 +150,87 @@
     !dir$ ignore_tkr(k) arg2; \
   end function ;
 
+! vector(i) function f(i, integer)
+#define FUNC_VII0I(VKIND) \
+  pure vector(integer(VKIND)) function func_vi##VKIND##i0i##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    integer(VKIND), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function ;
+
+! vector(r) function f(i, real)
+#define FUNC_VRI0R(VKIND) \
+  pure vector(real(VKIND)) function func_vr##VKIND##i0r##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    real(VKIND), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function ;
+
+! vector(i) function f(i, vector(i))
+#define FUNC_VII0VI(VKIND) \
+  pure vector(integer(VKIND)) function func_vi##VKIND##i0vi##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    vector(integer(VKIND)), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function ;
+
+! vector(u) function f(i, vector(u))
+#define FUNC_VUI0VU(VKIND) \
+  pure vector(unsigned(VKIND)) function func_vu##VKIND##i0vu##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    vector(unsigned(VKIND)), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function ;
+
+! vector(r) function f(i, vector(r))
+#define FUNC_VRI0VR(VKIND) \
+  pure vector(real(VKIND)) function func_vr##VKIND##i0vr##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    vector(real(VKIND)), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function ;
+
+! __vector_pair function f(i, vector(i))
+#define FUNC_VPI0VI(VKIND) \
+  pure __vector_pair function func_vpi0vi##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    vector(integer(VKIND)), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function;
+
+! __vector_pair function f(i, vector(u))
+#define FUNC_VPI0VU(VKIND) \
+  pure __vector_pair function func_vpi0vu##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    vector(unsigned(VKIND)), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function;
+
+! __vector_pair function f(i, vector(r))
+#define FUNC_VPI0VR(VKIND) \
+  pure __vector_pair function func_vpi0vr##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    vector(real(VKIND)), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function;
+
+! __vector_pair function f(i, __vector_pair)
+#define FUNC_VPI0VP \
+  pure __vector_pair function func_vpi0vp(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    __vector_pair, intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function;
+
 ! The following macros are specific for the vec_convert(v, mold) intrinsics as
 ! the argument keywords are different from the other vector intrinsics.
 !
@@ -203,10 +284,28 @@
   ELEM_FUNC_IVRVR(4,4) ELEM_FUNC_IVRVR(4,8)
   ELEM_FUNC_VRVII(4) ELEM_FUNC_VRVII(8)
   ELEM_FUNC_VRVUI(4) ELEM_FUNC_VRVUI(8)
+  FUNC_VII0VI(1) FUNC_VII0VI(2) FUNC_VII0VI(4) FUNC_VII0VI(8)
+  FUNC_VUI0VU(1) FUNC_VUI0VU(2) FUNC_VUI0VU(4) FUNC_VUI0VU(8)
+  FUNC_VRI0VR(4) FUNC_VRI0VR(8)
+  FUNC_VII0I(1) FUNC_VII0I(2) FUNC_VII0I(4) FUNC_VII0I(8)
+  FUNC_VRI0R(4) FUNC_VRI0R(8)
+  FUNC_VPI0VI(1) FUNC_VPI0VI(2) FUNC_VPI0VI(4) FUNC_VPI0VI(8)
+  FUNC_VPI0VU(1) FUNC_VPI0VU(2) FUNC_VPI0VU(4) FUNC_VPI0VU(8)
+  FUNC_VPI0VR(4) FUNC_VPI0VR(8)
+  FUNC_VPI0VP
 
 #undef FUNC_VEC_CONVERT_VRVIVR
 #undef FUNC_VEC_CONVERT_VUVIVU
 #undef FUNC_VEC_CONVERT_VIVIVI
+#undef FUNC_VPI0VP
+#undef FUNC_VPI0VR
+#undef FUNC_VPI0VU
+#undef FUNC_VPI0VI
+#undef FUNC_VRI0VR
+#undef FUNC_VUI0VU
+#undef FUNC_VII0VI
+#undef FUNC_VRI0R
+#undef FUNC_VII0I
 #undef ELEM_FUNC_RVRI
 #undef ELEM_FUNC_VRVUI
 #undef ELEM_FUNC_IVII
@@ -913,6 +1012,154 @@
 #undef VU_VI_VI
 #undef VI_VI_VI
 
+!-------------------------------------------------------
+! vector function(integer, i/u/r/vector)
+!-------------------------------------------------------
+! i0 means the integer argument has ignore_tkr(k)
+#define VI_I0_VI(NAME, VKIND) __ppc_##NAME##_vi##VKIND##i0##vi##VKIND
+#define VU_I0_VU(NAME, VKIND) __ppc_##NAME##_vu##VKIND##i0##vu##VKIND
+#define VR_I0_VR(NAME, VKIND) __ppc_##NAME##_vr##VKIND##i0##vr##VKIND
+#define VI_I0_I(NAME, VKIND) __ppc_##NAME##_vi##VKIND##i0##i##VKIND
+#define VR_I0_R(NAME, VKIND) __ppc_##NAME##_vr##VKIND##i0##r##VKIND
+
+#define VEC_VI_I0_VI(NAME, VKIND) \
+  procedure(func_vi##VKIND##i0##vi##VKIND) :: VI_I0_VI(NAME, VKIND);
+#define VEC_VU_I0_VU(NAME, VKIND) \
+  procedure(func_vu##VKIND##i0##vu##VKIND) :: VU_I0_VU(NAME, VKIND);
+#define VEC_VR_I0_VR(NAME, VKIND) \
+  procedure(func_vr##VKIND##i0##vr##VKIND) :: VR_I0_VR(NAME, VKIND);
+#define VEC_VI_I0_I(NAME, VKIND) \
+  procedure(func_vi##VKIND##i0##i##VKIND) :: VI_I0_I(NAME, VKIND);
+#define VEC_VR_I0_R(NAME, VKIND) \
+  procedure(func_vr##VKIND##i0##r##VKIND) :: VR_I0_R(NAME, VKIND);
+
+! vec_ld
+  VEC_VI_I0_VI(vec_ld,1) VEC_VI_I0_VI(vec_ld,2) VEC_VI_I0_VI(vec_ld,4)
+  VEC_VU_I0_VU(vec_ld,1) VEC_VU_I0_VU(vec_ld,2) VEC_VU_I0_VU(vec_ld,4)
+  VEC_VR_I0_VR(vec_ld,4)
+  VEC_VI_I0_I(vec_ld,1) VEC_VI_I0_I(vec_ld,2) VEC_VI_I0_I(vec_ld,4)
+  VEC_VR_I0_R(vec_ld,4)
+  interface vec_ld
+    procedure :: VI_I0_VI(vec_ld,1), VI_I0_VI(vec_ld,2), VI_I0_VI(vec_ld,4)
+    procedure :: VU_I0_VU(vec_ld,1), VU_I0_VU(vec_ld,2), VU_I0_VU(vec_ld,4)
+    procedure :: VR_I0_VR(vec_ld,4)
+    procedure :: VI_I0_I(vec_ld,1), VI_I0_I(vec_ld,2), VI_I0_I(vec_ld,4)
+    procedure :: VR_I0_R(vec_ld,4)
+  end interface
+  public :: vec_ld
+
+! vec_lde
+  VEC_VI_I0_I(vec_lde,1) VEC_VI_I0_I(vec_lde,2) VEC_VI_I0_I(vec_lde,4)
+  VEC_VR_I0_R(vec_lde,4)
+  interface vec_lde
+    procedure :: VI_I0_I(vec_lde,1), VI_I0_I(vec_lde,2), VI_I0_I(vec_lde,4)
+    procedure :: VR_I0_R(vec_lde,4)
+  end interface
+  public :: vec_lde
+
+! vec_ldl
+  VEC_VI_I0_VI(vec_ldl,1) VEC_VI_I0_VI(vec_ldl,2) VEC_VI_I0_VI(vec_ldl,4)
+  VEC_VU_I0_VU(vec_ldl,1) VEC_VU_I0_VU(vec_ldl,2) VEC_VU_I0_VU(vec_ldl,4)
+  VEC_VR_I0_VR(vec_ldl,4)
+  VEC_VI_I0_I(vec_ldl,1) VEC_VI_I0_I(vec_ldl,2) VEC_VI_I0_I(vec_ldl,4)
+  VEC_VR_I0_R(vec_ldl,4)
+  interface vec_ldl
+    procedure :: VI_I0_VI(vec_ldl,1), VI_I0_VI(vec_ldl,2), VI_I0_VI(vec_ldl,4)
+    procedure :: VU_I0_VU(vec_ldl,1), VU_I0_VU(vec_ldl,2), VU_I0_VU(vec_ldl,4)
+    procedure :: VR_I0_VR(vec_ldl,4)
+    procedure :: VI_I0_I(vec_ldl,1), VI_I0_I(vec_ldl,2), VI_I0_I(vec_ldl,4)
+    procedure :: VR_I0_R(vec_ldl,4)
+  end interface
+  public :: vec_ldl
+
+! vec_xld2
+  VEC_VI_I0_VI(vec_xld2_,1) VEC_VI_I0_VI(vec_xld2_,2) VEC_VI_I0_VI(vec_xld2_,4) VEC_VI_I0_VI(vec_xld2_,8)
+  VEC_VU_I0_VU(vec_xld2_,1) VEC_VU_I0_VU(vec_xld2_,2) VEC_VU_I0_VU(vec_xld2_,4) VEC_VU_I0_VU(vec_xld2_,8)
+  VEC_VR_I0_VR(vec_xld2_,4) VEC_VR_I0_VR(vec_xld2_,8)
+  VEC_VI_I0_I(vec_xld2_,1) VEC_VI_I0_I(vec_xld2_,2) VEC_VI_I0_I(vec_xld2_,4) VEC_VI_I0_I(vec_xld2_,8)
+  VEC_VR_I0_R(vec_xld2_,4) VEC_VR_I0_R(vec_xld2_,8)
+  interface vec_xld2
+    procedure :: VI_I0_VI(vec_xld2_,1), VI_I0_VI(vec_xld2_,2), VI_I0_VI(vec_xld2_,4), VI_I0_VI(vec_xld2_,8)
+    procedure :: VU_I0_VU(vec_xld2_,1), VU_I0_VU(vec_xld2_,2), VU_I0_VU(vec_xld2_,4), VU_I0_VU(vec_xld2_,8)
+    procedure :: VR_I0_VR(vec_xld2_,4), VR_I0_VR(vec_xld2_,8)
+    procedure :: VI_I0_I(vec_xld2_,1), VI_I0_I(vec_xld2_,2), VI_I0_I(vec_xld2_,4), VI_I0_I(vec_xld2_,8)
+    procedure :: VR_I0_R(vec_xld2_,4), VR_I0_R(vec_xld2_,8)
+  end interface
+  public :: vec_xld2
+
+! vec_xlw4
+  VEC_VI_I0_VI(vec_xlw4_,1) VEC_VI_I0_VI(vec_xlw4_,2)
+  VEC_VU_I0_VU(vec_xlw4_,1) VEC_VU_I0_VU(vec_xlw4_,2) VEC_VU_I0_VU(vec_xlw4_,4)
+  VEC_VR_I0_VR(vec_xlw4_,4)
+  VEC_VI_I0_I(vec_xlw4_,1) VEC_VI_I0_I(vec_xlw4_,2) VEC_VI_I0_I(vec_xlw4_,4)
+  VEC_VR_I0_R(vec_xlw4_,4)
+  interface vec_xlw4
+    procedure :: VI_I0_VI(vec_xlw4_,1), VI_I0_VI(vec_xlw4_,2)
+    procedure :: VU_I0_VU(vec_xlw4_,1), VU_I0_VU(vec_xlw4_,2), VU_I0_VU(vec_xlw4_,4)
+    procedure :: VR_I0_VR(vec_xlw4_,4)
+    procedure :: VI_I0_I(vec_xlw4_,1), VI_I0_I(vec_xlw4_,2), VI_I0_I(vec_xlw4_,4)
+    procedure :: VR_I0_R(vec_xlw4_,4)
+  end interface
+  public :: vec_xlw4
+
+#undef VEC_VR_I0_R
+#undef VEC_VI_I0_I
+#undef VEC_VR_I0_VR
+#undef VEC_VU_I0_VU
+#undef VEC_VI_I0_VI
+#undef VR_I0_R
+#undef VI_I0_I
+#undef VR_I0_VR
+#undef VU_I0_VU
+#undef VI_I0_VI
+
+!-------------------------------------------------------
+! __vector_pair function(integer, vector/__vector_pair)
+!-------------------------------------------------------
+#define VP_I0_VI(NAME, VKIND) __ppc_##NAME##_vpi0##vi##VKIND
+#define VP_I0_VU(NAME, VKIND) __ppc_##NAME##_vpi0##vu##VKIND
+#define VP_I0_VR(NAME, VKIND) __ppc_##NAME##_vpi0##vr##VKIND
+#define VP_I0_VP(NAME) __ppc_##NAME##_vpi0vp0
+
+#define VEC_VP_I0_VI(NAME, VKIND) \
+  procedure(func_vpi0vi##VKIND) :: VP_I0_VI(NAME, VKIND);
+#define VEC_VP_I0_VU(NAME, VKIND) \
+  procedure(func_vpi0vu##VKIND) :: VP_I0_VU(NAME, VKIND);
+#define VEC_VP_I0_VR(NAME, VKIND) \
+  procedure(func_vpi0vr##VKIND) :: VP_I0_VR(NAME, VKIND);
+#define VEC_VP_I0_VP(NAME) procedure(func_vpi0vp) :: VP_I0_VP(NAME);
+
+! vec_lxvp
+  VEC_VP_I0_VI(vec_lxvp,1) VEC_VP_I0_VI(vec_lxvp,2) VEC_VP_I0_VI(vec_lxvp,4) VEC_VP_I0_VI(vec_lxvp,8)
+  VEC_VP_I0_VU(vec_lxvp,1) VEC_VP_I0_VU(vec_lxvp,2) VEC_VP_I0_VU(vec_lxvp,4) VEC_VP_I0_VU(vec_lxvp,8)
+  VEC_VP_I0_VR(vec_lxvp,4) VEC_VP_I0_VR(vec_lxvp,8)
+  VEC_VP_I0_VP(vec_lxvp)
+  interface vec_lxvp
+     procedure :: VP_I0_VI(vec_lxvp,1), VP_I0_VI(vec_lxvp,2), VP_I0_VI(vec_lxvp,4), VP_I0_VI(vec_lxvp,8)
+     procedure :: VP_I0_VU(vec_lxvp,1), VP_I0_VU(vec_lxvp,2), VP_I0_VU(vec_lxvp,4), VP_I0_VU(vec_lxvp,8)
+     procedure :: VP_I0_VR(vec_lxvp,4), VP_I0_VR(vec_lxvp,8)
+     procedure :: VP_I0_VP(vec_lxvp)
+  end interface vec_lxvp
+  public :: vec_lxvp
+
+! vsx_lxvp (alias to vec_lxvp)
+  interface vsx_lxvp
+     procedure :: VP_I0_VI(vec_lxvp,1), VP_I0_VI(vec_lxvp,2), VP_I0_VI(vec_lxvp,4), VP_I0_VI(vec_lxvp,8)
+     procedure :: VP_I0_VU(vec_lxvp,1), VP_I0_VU(vec_lxvp,2), VP_I0_VU(vec_lxvp,4), VP_I0_VU(vec_lxvp,8)
+     procedure :: VP_I0_VR(vec_lxvp,4), VP_I0_VR(vec_lxvp,8)
+     procedure :: VP_I0_VP(vec_lxvp)
+  end interface vsx_lxvp
+  public :: vsx_lxvp
+
+#undef VEC_VP_I_VP
+#undef VEC_VP_I_VR
+#undef VEC_VP_I_VU
+#undef VEC_VP_I_VI
+#undef VP_I_VP
+#undef VP_I_VR
+#undef VP_I_VU
+#undef VP_I_VI
+
 !-----------------------------------------
 ! vector function(vector, vector, vector)
 !-----------------------------------------
Index: flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
===================================================================
--- /dev/null
+++ flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
@@ -0,0 +1,522 @@
+! RUN: bbc -emit-fir %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="FIR" %s
+! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="LLVMIR" %s
+! REQUIRES: target=powerpc{{.*}}
+
+!-------------------
+! vec_ld
+!-------------------
+
+! CHECK-LABEL: @vec_ld_testi8
+subroutine vec_ld_testi8(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(integer(1)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<16:i8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<16xi8>
+! FIR: %[[undefv:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <16 x i8>
+! LLVMIR: %[[shflv:.*]] = shufflevector <16 x i8> %[[bc]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testi8
+
+! CHECK-LABEL: @vec_ld_testi16
+subroutine vec_ld_testi16(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(integer(2)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<8xi16>
+! FIR: %[[undefv:.*]] = fir.undefined vector<8xi16>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <8 x i16>
+! LLVMIR: %[[shflv:.*]] = shufflevector <8 x i16> %[[bc]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <8 x i16> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testi16
+
+! CHECK-LABEL: @vec_ld_testi32
+subroutine vec_ld_testi32(arg1, arg2, res)
+  integer(4) :: arg1
+  vector(integer(4)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x i32> %[[ld]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x i32> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testi32
+
+! CHECK-LABEL: @vec_ld_testf32
+subroutine vec_ld_testf32(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(4)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[i4:.*]] = fir.convert %[[arg1]] : (i64) -> i32
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[i4]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[i4:.*]] = trunc i64 %[[arg1]] to i32
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[i4]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x float> %[[bc]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x float> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testf32
+
+! CHECK-LABEL: @vec_ld_testu32
+subroutine vec_ld_testu32(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(unsigned(4)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<4:ui32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:ui32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:ui32>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x i32> %[[ld]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x i32> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testu32
+
+! CHECK-LABEL: @vec_ld_testi32a
+subroutine vec_ld_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(10)
+  vector(integer(4)) :: res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<10xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x i32> %[[ld]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x i32> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testi32a
+
+! CHECK-LABEL: @vec_ld_testf32av
+subroutine vec_ld_testf32av(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(4)) :: arg2(2, 4, 8)
+  vector(real(4)) :: res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[i4:.*]] = fir.convert %[[arg1]] : (i64) -> i32
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[i4]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[i4:.*]] = trunc i64 %[[arg1]] to i32
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[i4]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x float> %[[bc]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x float> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testf32av
+
+! CHECK-LABEL: @vec_ld_testi32s
+subroutine vec_ld_testi32s(arg1, arg2, res)
+  integer(4) :: arg1
+  real(4) :: arg2
+  vector(real(4)) :: res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x float> %[[bc]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x float> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testi32s
+
+!-------------------
+! vec_lde
+!-------------------
+
+! CHECK-LABEL: @vec_lde_testi8s
+subroutine vec_lde_testi8s(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2
+  vector(integer(1)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvebx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[undefv:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvebx(ptr %[[addr]])
+! LLVMIR: %[[shflv:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[shflv]], ptr %2, align 16
+end subroutine vec_lde_testi8s
+
+! CHECK-LABEL: @vec_lde_testi16a
+subroutine vec_lde_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(2, 11, 7)
+  vector(integer(2)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x11x7xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvehx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<8xi16>
+! FIR: %[[undefv:.*]] = fir.undefined vector<8xi16>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <8 x i16> @llvm.ppc.altivec.lvehx(ptr %[[addr]])
+! LLVMIR: %[[shflv:.*]] = shufflevector <8 x i16> %[[ld]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <8 x i16> %[[shflv]], ptr %2, align 16
+end subroutine vec_lde_testi16a
+
+! CHECK-LABEL: @vec_lde_testi32a
+subroutine vec_lde_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(5)
+  vector(integer(4)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<5xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]])
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x i32> %[[ld]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x i32> %[[shflv]], ptr %2, align 16
+end subroutine vec_lde_testi32a
+
+! CHECK-LABEL: @vec_lde_testf32a
+subroutine vec_lde_testf32a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(4) :: arg2(11)
+  vector(real(4)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<11xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x float> %[[bc]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x float> %[[shflv]], ptr %2, align 16
+end subroutine vec_lde_testf32a
+
+!-------------------
+! vec_xld2
+!-------------------
+
+! CHECK-LABEL: @vec_xld2_testi8a
+subroutine vec_xld2_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(integer(1)) :: arg2(4)
+  vector(integer(1)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<16:i8>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi8a
+
+! CHECK-LABEL: @vec_xld2_testi16a
+subroutine vec_xld2_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(integer(2)) :: arg2(4)
+  vector(integer(2)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<8:i16>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <8 x i16>
+! LLVMIR:  store <8 x i16> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi16a
+
+! CHECK-LABEL: @vec_xld2_testi32a
+subroutine vec_xld2_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  vector(integer(4)) :: arg2(11)
+  vector(integer(4)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<11x!fir.vector<4:i32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi32a
+
+! CHECK-LABEL: @vec_xld2_testi64a
+subroutine vec_xld2_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(integer(8)) :: arg2(31,7)
+  vector(integer(8)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<31x7x!fir.vector<2:i64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi64a
+
+! CHECK-LABEL: @vec_xld2_testf32a
+subroutine vec_xld2_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(real(4)) :: arg2(5)
+  vector(real(4)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<5x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testf32a
+
+! CHECK-LABEL: @vec_xld2_testf64a
+subroutine vec_xld2_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(8)) :: arg2(4)
+  vector(real(8)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:f64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: store <2 x double> %[[ld]], ptr %2, align 16
+end subroutine vec_xld2_testf64a
+
+!-------------------
+! vec_xlw4
+!-------------------
+
+! CHECK-LABEL: @vec_xlw4_testi8a
+subroutine vec_xlw4_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(integer(1)) :: arg2(2, 11, 37)
+  vector(integer(1)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x11x37x!fir.vector<16:i8>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16
+end subroutine vec_xlw4_testi8a
+
+! CHECK-LABEL: @vec_xlw4_testi16a
+subroutine vec_xlw4_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(integer(2)) :: arg2(2, 8)
+  vector(integer(2)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x8x!fir.vector<8:i16>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16
+end subroutine vec_xlw4_testi16a
+
+! CHECK-LABEL: @vec_xlw4_testu32a
+subroutine vec_xlw4_testu32a(arg1, arg2, res)
+  integer(4) :: arg1
+  vector(unsigned(4)) :: arg2(8, 4)
+  vector(unsigned(4)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<8x4x!fir.vector<4:ui32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:ui32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:ui32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[ld]], ptr %2, align 16
+end subroutine vec_xlw4_testu32a
+
+! CHECK-LABEL: @vec_xlw4_testf32a
+subroutine vec_xlw4_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(real(4)) :: arg2
+  vector(real(4)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_xlw4_testf32a
Index: flang/test/Lower/PowerPC/ppc-vec-load-pwr10.f90
===================================================================
--- /dev/null
+++ flang/test/Lower/PowerPC/ppc-vec-load-pwr10.f90
@@ -0,0 +1,370 @@
+! RUN: %flang_fc1 -target-cpu pwr10 -emit-fir %s -o - | FileCheck --check-prefixes="FIR" %s
+! RUN: %flang_fc1 -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR-P10" %s
+! REQUIRES: target=powerpc{{.*}}
+
+!----------------------
+! vec_lxvp
+!----------------------
+
+! CHECK-LABEL: @vec_lxvp_test_i2_
+subroutine vec_lxvp_test_i2(v1, offset, vp)
+  implicit none
+  integer(2) :: offset
+  vector(integer(2)) :: v1
+  __vector_pair :: vp
+  vp = vec_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i16>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vec_lxvp_test_i2
+
+! CHECK-LABEL: @vec_lxvp_test_i4_
+subroutine vec_lxvp_test_i4(v1, offset, vp)
+  implicit none
+  integer(2) :: offset
+  vector(integer(4)) :: v1
+  __vector_pair :: vp
+  vp = vec_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i16>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vec_lxvp_test_i4
+
+! CHECK-LABEL: @vec_lxvp_test_u2_
+subroutine vec_lxvp_test_u2(v1, offset, vp)
+  implicit none
+  integer(2) :: offset
+  vector(unsigned(2)) :: v1
+  __vector_pair :: vp
+  vp = vec_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i16>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.vector<8:ui16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vec_lxvp_test_u2
+
+! CHECK-LABEL: @vec_lxvp_test_u4_
+subroutine vec_lxvp_test_u4(v1, offset, vp)
+  implicit none
+  integer(2) :: offset
+  vector(unsigned(4)) :: v1
+  __vector_pair :: vp
+  vp = vec_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i16>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.vector<4:ui32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vec_lxvp_test_u4
+
+! CHECK-LABEL: @vec_lxvp_test_r4_
+subroutine vec_lxvp_test_r4(v1, offset, vp)
+  implicit none
+  integer(2) :: offset
+  vector(real(4)) :: v1
+  __vector_pair :: vp
+  vp = vec_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i16>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vec_lxvp_test_r4
+
+! CHECK-LABEL: @vec_lxvp_test_r8_
+subroutine vec_lxvp_test_r8(v1, offset, vp)
+  implicit none
+  integer(2) :: offset
+  vector(real(8)) :: v1
+  __vector_pair :: vp
+  vp = vec_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i16>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.vector<2:f64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vec_lxvp_test_r8
+
+! CHECK-LABEL: @vec_lxvp_test_vp_
+subroutine vec_lxvp_test_vp(v1, offset, vp)
+  implicit none
+  integer(2) :: offset
+  __vector_pair :: v1
+  __vector_pair :: vp
+  vp = vec_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i16>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.vector<256:i1>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vec_lxvp_test_vp
+
+! CHECK-LABEL: @vec_lxvp_test_i2_arr_
+subroutine vec_lxvp_test_i2_arr(v1, offset, vp)
+  implicit none
+  integer :: offset
+  vector(integer(2)) :: v1(10)
+  __vector_pair :: vp
+  vp = vec_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i32>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.array<10x!fir.vector<8:i16>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i32, ptr %1, align 4
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vec_lxvp_test_i2_arr
+
+! CHECK-LABEL: @vec_lxvp_test_i4_arr_
+subroutine vec_lxvp_test_i4_arr(v1, offset, vp)
+  implicit none
+  integer :: offset
+  vector(integer(4)) :: v1(10)
+  __vector_pair :: vp
+  vp = vec_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i32>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.array<10x!fir.vector<4:i32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i32, ptr %1, align 4
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vec_lxvp_test_i4_arr
+
+! CHECK-LABEL: @vec_lxvp_test_u2_arr_
+subroutine vec_lxvp_test_u2_arr(v1, offset, vp)
+  implicit none
+  integer :: offset
+  vector(unsigned(2)) :: v1(10)
+  __vector_pair :: vp
+  vp = vec_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i32>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.array<10x!fir.vector<8:ui16>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i32, ptr %1, align 4
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vec_lxvp_test_u2_arr
+
+! CHECK-LABEL: @vec_lxvp_test_u4_arr_
+subroutine vec_lxvp_test_u4_arr(v1, offset, vp)
+  implicit none
+  integer :: offset
+  vector(unsigned(4)) :: v1(10)
+  __vector_pair :: vp
+  vp = vec_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i32>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.array<10x!fir.vector<4:ui32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i32, ptr %1, align 4
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vec_lxvp_test_u4_arr
+
+! CHECK-LABEL: @vec_lxvp_test_r4_arr_
+subroutine vec_lxvp_test_r4_arr(v1, offset, vp)
+  implicit none
+  integer :: offset
+  vector(real(4)) :: v1(10)
+  __vector_pair :: vp
+  vp = vec_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i32>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.array<10x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i32, ptr %1, align 4
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vec_lxvp_test_r4_arr
+
+! CHECK-LABEL: @vec_lxvp_test_r8_arr_
+subroutine vec_lxvp_test_r8_arr(v1, offset, vp)
+  implicit none
+  integer :: offset
+  vector(real(8)) :: v1(10)
+  __vector_pair :: vp
+  vp = vec_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i32>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.array<10x!fir.vector<2:f64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i32, ptr %1, align 4
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vec_lxvp_test_r8_arr
+
+! CHECK-LABEL: @vec_lxvp_test_vp_arr_
+subroutine vec_lxvp_test_vp_arr(v1, offset, vp)
+  implicit none
+  integer(8) :: offset
+  __vector_pair :: v1(10)
+  __vector_pair :: vp
+  vp = vec_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i64>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.array<10x!fir.vector<256:i1>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i64, ptr %1, align 8
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i64 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vec_lxvp_test_vp_arr
+
+!----------------------
+! vsx_lxvp
+!----------------------
+! CHECK-LABEL: @vsx_lxvp_test_i4_
+subroutine vsx_lxvp_test_i4(v1, offset, vp)
+  implicit none
+  integer(2) :: offset
+  vector(integer(4)) :: v1
+  __vector_pair :: vp
+  vp = vsx_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i16>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vsx_lxvp_test_i4
+
+! CHECK-LABEL: @vsx_lxvp_test_r8_
+subroutine vsx_lxvp_test_r8(v1, offset, vp)
+  implicit none
+  integer(2) :: offset
+  vector(real(8)) :: v1
+  __vector_pair :: vp
+  vp = vsx_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i16>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.vector<2:f64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i16, ptr %1, align 2
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vsx_lxvp_test_r8
+
+! CHECK-LABEL: @vsx_lxvp_test_i2_arr_
+subroutine vsx_lxvp_test_i2_arr(v1, offset, vp)
+  implicit none
+  integer :: offset
+  vector(integer(2)) :: v1(10)
+  __vector_pair :: vp
+  vp = vsx_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i32>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.array<10x!fir.vector<8:i16>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i32, ptr %1, align 4
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vsx_lxvp_test_i2_arr
+
+! CHECK-LABEL: @vsx_lxvp_test_vp_arr_
+subroutine vsx_lxvp_test_vp_arr(v1, offset, vp)
+  implicit none
+  integer(8) :: offset
+  __vector_pair :: v1(10)
+  __vector_pair :: vp
+  vp = vsx_lxvp(offset, v1)
+
+! FIR: %[[offset:.*]] = fir.load %arg1 : !fir.ref<i64>
+! FIR: %[[v1:.*]] = fir.convert %arg0 : (!fir.ref<!fir.array<10x!fir.vector<256:i1>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[v1]], %[[offset]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.lxvp(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> !fir.vector<256:i1>
+! FIR: fir.store %[[call]] to %arg2 : !fir.ref<!fir.vector<256:i1>>
+
+! LLVMIR-P10: %[[offset:.*]] = load i64, ptr %1, align 8
+! LLVMIR-P10: %[[addr:.*]] = getelementptr i8, ptr %0, i64 %[[offset]]
+! LLVMIR-P10: %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+! LLVMIR-P10: store <256 x i1> %[[call]], ptr %2, align 32
+end subroutine vsx_lxvp_test_vp_arr
Index: flang/test/Lower/PowerPC/ppc-vec-load.f90
===================================================================
--- /dev/null
+++ flang/test/Lower/PowerPC/ppc-vec-load.f90
@@ -0,0 +1,655 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="FIR" %s
+! RUN: %flang -emit-llvm -S %s -o - | FileCheck --check-prefixes="LLVMIR" %s
+! REQUIRES: target=powerpc{{.*}}
+
+!----------------------
+! vec_ld
+!----------------------
+
+! CHECK-LABEL: @vec_ld_testi8
+subroutine vec_ld_testi8(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(integer(1)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<16:i8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<16xi8>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %{{.*}}, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16
+end subroutine vec_ld_testi8
+
+! CHECK-LABEL: @vec_ld_testi16
+subroutine vec_ld_testi16(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(integer(2)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<8xi16>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16
+end subroutine vec_ld_testi16
+
+! CHECK-LABEL: @vec_ld_testi32
+subroutine vec_ld_testi32(arg1, arg2, res)
+  integer(4) :: arg1
+  vector(integer(4)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[bc:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16
+end subroutine vec_ld_testi32
+
+! CHECK-LABEL: @vec_ld_testf32
+subroutine vec_ld_testf32(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(4)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i64>
+! FIR: %[[arg1i32:.*]] = fir.convert %[[arg1]] : (i64) -> i32
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1i32]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[arg1i32:.*]] = trunc i64 %[[arg1]] to i32
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1i32]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_ld_testf32
+
+! CHECK-LABEL: @vec_ld_testu32
+subroutine vec_ld_testu32(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(unsigned(4)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:ui32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:ui32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:ui32>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16
+end subroutine vec_ld_testu32
+
+! CHECK-LABEL: @vec_ld_testi32a
+subroutine vec_ld_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(10)
+  vector(integer(4)) :: res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.array<10xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16
+end subroutine vec_ld_testi32a
+
+! CHECK-LABEL: @vec_ld_testf32av
+subroutine vec_ld_testf32av(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(4)) :: arg2(2, 4, 8)
+  vector(real(4)) :: res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i64>
+! FIR: %[[arg1i32:.*]] = fir.convert %[[arg1]] : (i64) -> i32
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.array<2x4x8x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1i32]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[arg1i32:.*]] = trunc i64 %[[arg1]] to i32
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1i32]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_ld_testf32av
+
+! CHECK-LABEL: @vec_ld_testi32s
+subroutine vec_ld_testi32s(arg1, arg2, res)
+  integer(4) :: arg1
+  real(4) :: arg2
+  vector(real(4)) :: res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_ld_testi32s
+
+!----------------------
+! vec_lde
+!----------------------
+
+! CHECK-LABEL: @vec_lde_testi8s
+subroutine vec_lde_testi8s(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2
+  vector(integer(1)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvebx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <16 x i8> @llvm.ppc.altivec.lvebx(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[call]], ptr %2, align 16
+end subroutine vec_lde_testi8s
+
+! CHECK-LABEL: @vec_lde_testi16a
+subroutine vec_lde_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(2, 4, 8)
+  vector(integer(2)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvehx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<8xi16>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <8 x i16> @llvm.ppc.altivec.lvehx(ptr %[[addr]])
+! LLVMIR: store <8 x i16> %[[call]], ptr %2, align 16
+end subroutine vec_lde_testi16a
+
+! CHECK-LABEL: @vec_lde_testi32a
+subroutine vec_lde_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(4)
+  vector(integer(4)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16
+end subroutine vec_lde_testi32a
+
+! CHECK-LABEL: @vec_lde_testf32a
+subroutine vec_lde_testf32a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(4) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_lde_testf32a
+
+!----------------------
+! vec_ldl
+!----------------------
+
+! CHECK-LABEL: @vec_ldl_testi8
+subroutine vec_ldl_testi8(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(integer(1)) :: arg2, res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<16:i8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<16xi8>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %{{.*}}, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16
+end subroutine vec_ldl_testi8
+
+! CHECK-LABEL: @vec_ldl_testi16
+subroutine vec_ldl_testi16(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(integer(2)) :: arg2, res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<8xi16>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16
+end subroutine vec_ldl_testi16
+
+! CHECK-LABEL: @vec_ldl_testi32
+subroutine vec_ldl_testi32(arg1, arg2, res)
+  integer(4) :: arg1
+  vector(integer(4)) :: arg2, res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[bc:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16
+end subroutine vec_ldl_testi32
+
+! CHECK-LABEL: @vec_ldl_testf32
+subroutine vec_ldl_testf32(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(4)) :: arg2, res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_ldl_testf32
+
+! CHECK-LABEL: @vec_ldl_testu32
+subroutine vec_ldl_testu32(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(unsigned(4)) :: arg2, res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:ui32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:ui32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:ui32>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16
+end subroutine vec_ldl_testu32
+
+! CHECK-LABEL: @vec_ldl_testi32a
+subroutine vec_ldl_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(10)
+  vector(integer(4)) :: res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.array<10xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16
+end subroutine vec_ldl_testi32a
+
+! CHECK-LABEL: @vec_ldl_testf32av
+subroutine vec_ldl_testf32av(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(4)) :: arg2(2, 4, 8)
+  vector(real(4)) :: res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.array<2x4x8x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_ldl_testf32av
+
+! CHECK-LABEL: @vec_ldl_testi32s
+subroutine vec_ldl_testi32s(arg1, arg2, res)
+  integer(4) :: arg1
+  real(4) :: arg2
+  vector(real(4)) :: res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_ldl_testi32s
+
+!----------------------
+! vec_xld2
+!----------------------
+
+! CHECK-LABEL: @vec_xld2_testi8a
+subroutine vec_xld2_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(integer(1)) :: arg2(4)
+  vector(integer(1)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<16:i8>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi8a
+
+! CHECK-LABEL: @vec_xld2_testi16
+subroutine vec_xld2_testi16(arg1, arg2, res)
+  integer :: arg1
+  vector(integer(2)) :: arg2
+  vector(integer(2)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi16
+
+! CHECK-LABEL: @vec_xld2_testi32a
+subroutine vec_xld2_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  vector(integer(4)) :: arg2(41)
+  vector(integer(4)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<41x!fir.vector<4:i32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi32a
+
+! CHECK-LABEL: @vec_xld2_testi64a
+subroutine vec_xld2_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(integer(8)) :: arg2(4)
+  vector(integer(8)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:i64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi64a
+
+! CHECK-LABEL: @vec_xld2_testf32a
+subroutine vec_xld2_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(real(4)) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testf32a
+
+! CHECK-LABEL: @vec_xld2_testf64a
+subroutine vec_xld2_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(8)) :: arg2(4)
+  vector(real(8)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:f64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: store <2 x double> %[[ld]], ptr %2, align 16
+end subroutine vec_xld2_testf64a
+
+!----------------------
+! vec_xlw4
+!----------------------
+
+! CHECK-LABEL: @vec_xlw4_testi8a
+subroutine vec_xlw4_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(integer(1)) :: arg2(2, 4, 8)
+  vector(integer(1)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8x!fir.vector<16:i8>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
+! LLVMIR: %[[res:.*]] = bitcast <4 x i32> %[[ld]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[res]], ptr %2, align 16
+end subroutine vec_xlw4_testi8a
+
+! CHECK-LABEL: @vec_xlw4_testi16a
+subroutine vec_xlw4_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(integer(2)) :: arg2(2, 4, 8)
+  vector(integer(2)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8x!fir.vector<8:i16>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
+! LLVMIR: %[[res:.*]] = bitcast <4 x i32> %[[ld]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[res]], ptr %2, align 16
+end subroutine vec_xlw4_testi16a
+
+! CHECK-LABEL: @vec_xlw4_testu32a
+subroutine vec_xlw4_testu32a(arg1, arg2, res)
+  integer(4) :: arg1
+  vector(unsigned(4)) :: arg2(2, 4, 8)
+  vector(unsigned(4)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8x!fir.vector<4:ui32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:ui32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:ui32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[ld]], ptr %2, align 16
+end subroutine vec_xlw4_testu32a
+
+! CHECK-LABEL: @vec_xlw4_testf32a
+subroutine vec_xlw4_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(real(4)) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
+! LLVMIR: %[[res:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: store <4 x float> %[[res]], ptr %2, align 16
+end subroutine vec_xlw4_testf32a