Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -5402,6 +5402,7 @@ defm logical_abbreviations : OptInFC1FFlag<"logical-abbreviations", "Enable logical abbreviations">; defm implicit_none : OptInFC1FFlag<"implicit-none", "No implicit typing allowed unless overridden by IMPLICIT statements">; defm underscoring : OptInFC1FFlag<"underscoring", "Appends one trailing underscore to external names">; +defm ppc_native_vec_elem_order: OptInFC1FFlag<"ppc-native-vector-element-order", "Specifies PowerPC vector element order">; def fno_automatic : Flag<["-"], "fno-automatic">, Group, HelpText<"Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE">; Index: clang/lib/Driver/ToolChains/Flang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Flang.cpp +++ clang/lib/Driver/ToolChains/Flang.cpp @@ -142,6 +142,8 @@ CmdArgs.push_back("-flang-experimental-hlfir"); if (Args.hasArg(options::OPT_flang_experimental_polymorphism)) CmdArgs.push_back("-flang-experimental-polymorphism"); + if (Args.hasArg(options::OPT_fno_ppc_native_vec_elem_order)) + CmdArgs.push_back("-fno-ppc-native-vector-element-order"); if (shouldLoopVersion(Args)) CmdArgs.push_back("-fversion-loops-for-stride"); } Index: flang/include/flang/Lower/CustomIntrinsicCall.h =================================================================== --- flang/include/flang/Lower/CustomIntrinsicCall.h +++ flang/include/flang/Lower/CustomIntrinsicCall.h @@ -103,11 +103,12 @@ /// Generate the FIR+MLIR operations for the generic intrinsic \p name /// with argument \p args and expected result type \p resultType. /// Returned fir::ExtendedValue is the returned Fortran intrinsic value. -fir::ExtendedValue genIntrinsicCall(fir::FirOpBuilder &builder, - mlir::Location loc, llvm::StringRef name, - std::optional resultType, - llvm::ArrayRef args, - StatementContext &stmtCtx); +fir::ExtendedValue +genIntrinsicCall(fir::FirOpBuilder &builder, mlir::Location loc, + llvm::StringRef name, std::optional resultType, + llvm::ArrayRef args, + StatementContext &stmtCtx, + Fortran::lower::AbstractConverter *converter = nullptr); } // namespace lower } // namespace Fortran Index: flang/include/flang/Lower/LoweringOptions.def =================================================================== --- flang/include/flang/Lower/LoweringOptions.def +++ flang/include/flang/Lower/LoweringOptions.def @@ -31,5 +31,8 @@ /// Off by default until fully ready. ENUM_LOWERINGOPT(LowerToHighLevelFIR, unsigned, 1, 0) +/// If true, reverse PowerPC native vector element order. +ENUM_LOWERINGOPT(NoPPCNativeVecElemOrder, unsigned, 1, 0) + #undef LOWERINGOPT #undef ENUM_LOWERINGOPT Index: flang/include/flang/Optimizer/Builder/IntrinsicCall.h =================================================================== --- flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -9,6 +9,7 @@ #ifndef FORTRAN_LOWER_INTRINSICCALL_H #define FORTRAN_LOWER_INTRINSICCALL_H +#include "flang/Lower/AbstractConverter.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/Runtime/Character.h" @@ -34,7 +35,8 @@ std::pair genIntrinsicCall(fir::FirOpBuilder &, mlir::Location, llvm::StringRef name, std::optional resultType, - llvm::ArrayRef args); + llvm::ArrayRef args, + Fortran::lower::AbstractConverter *converter = nullptr); /// Enums used to templatize and share lowering of MIN and MAX. enum class Extremum { Min, Max }; @@ -124,8 +126,10 @@ struct IntrinsicLibrary { // Constructors. - explicit IntrinsicLibrary(fir::FirOpBuilder &builder, mlir::Location loc) - : builder{builder}, loc{loc} {} + explicit IntrinsicLibrary( + fir::FirOpBuilder &builder, mlir::Location loc, + Fortran::lower::AbstractConverter *converter = nullptr) + : builder{builder}, loc{loc}, converter{converter} {} IntrinsicLibrary() = delete; IntrinsicLibrary(const IntrinsicLibrary &) = delete; @@ -416,6 +420,7 @@ fir::FirOpBuilder &builder; mlir::Location loc; bool resultMustBeFreed = false; + Fortran::lower::AbstractConverter *converter = nullptr; }; struct IntrinsicDummyArgument { Index: flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h =================================================================== --- flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h +++ flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h @@ -120,6 +120,11 @@ PPCIntrinsicLibrary() = delete; PPCIntrinsicLibrary(const PPCIntrinsicLibrary &) = delete; + // Helper functions for vector element ordering. + bool isBEVecElemOrderOnLE(); + bool isNativeVecElemOrderOnLE(); + bool changeVecElemOrder(); + // PPC intrinsic handlers. template void genMtfsf(llvm::ArrayRef); Index: flang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- flang/lib/Frontend/CompilerInvocation.cpp +++ flang/lib/Frontend/CompilerInvocation.cpp @@ -942,6 +942,11 @@ res.loweringOpts.setPolymorphicTypeImpl(true); } + // -fno-ppc-native-vector-element-order + if (args.hasArg(clang::driver::options::OPT_fno_ppc_native_vec_elem_order)) { + res.loweringOpts.setNoPPCNativeVecElemOrder(true); + } + success &= parseFrontendArgs(res.getFrontendOpts(), args, diags); parseTargetArgs(res.getTargetOpts(), args); parsePreprocessorArgs(res.getPreprocessorOpts(), args); Index: flang/lib/Lower/ConvertExpr.cpp =================================================================== --- flang/lib/Lower/ConvertExpr.cpp +++ flang/lib/Lower/ConvertExpr.cpp @@ -1928,7 +1928,7 @@ } // Let the intrinsic library lower the intrinsic procedure call return Fortran::lower::genIntrinsicCall(builder, getLoc(), name, resultType, - operands, stmtCtx); + operands, stmtCtx, &converter); } /// helper to detect statement functions Index: flang/lib/Lower/CustomIntrinsicCall.cpp =================================================================== --- flang/lib/Lower/CustomIntrinsicCall.cpp +++ flang/lib/Lower/CustomIntrinsicCall.cpp @@ -98,9 +98,10 @@ llvm::StringRef name, std::optional resultType, llvm::ArrayRef args, - Fortran::lower::StatementContext &stmtCtx) { + Fortran::lower::StatementContext &stmtCtx, + Fortran::lower::AbstractConverter *converter) { auto [result, mustBeFreed] = - fir::genIntrinsicCall(builder, loc, name, resultType, args); + fir::genIntrinsicCall(builder, loc, name, resultType, args, converter); if (mustBeFreed) { mlir::Value addr = fir::getBase(result); if (auto *box = result.getBoxOf()) Index: flang/lib/Optimizer/Builder/IntrinsicCall.cpp =================================================================== --- flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -5767,9 +5767,10 @@ std::pair genIntrinsicCall(fir::FirOpBuilder &builder, mlir::Location loc, llvm::StringRef name, std::optional resultType, - llvm::ArrayRef args) { - return IntrinsicLibrary{builder, loc}.genIntrinsicCall(name, resultType, - args); + llvm::ArrayRef args, + Fortran::lower::AbstractConverter *converter) { + return IntrinsicLibrary{builder, loc, converter}.genIntrinsicCall( + name, resultType, args); } mlir::Value genMax(fir::FirOpBuilder &builder, mlir::Location loc, Index: flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp =================================================================== --- flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp +++ flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp @@ -326,6 +326,20 @@ return ppcMathOps.equal_range(name); } +// Helper functions for vector element ordering. +bool PPCIntrinsicLibrary::isBEVecElemOrderOnLE() { + return (Fortran::evaluate::isHostLittleEndian && + converter->getLoweringOptions().getNoPPCNativeVecElemOrder()); +} +bool PPCIntrinsicLibrary::isNativeVecElemOrderOnLE() { + return (Fortran::evaluate::isHostLittleEndian && + !converter->getLoweringOptions().getNoPPCNativeVecElemOrder()); +} +bool PPCIntrinsicLibrary::changeVecElemOrder() { + return (Fortran::evaluate::isHostLittleEndian != + converter->getLoweringOptions().getNoPPCNativeVecElemOrder()); +} + //===----------------------------------------------------------------------===// // PowerPC specific intrinsic handlers. //===----------------------------------------------------------------------===// @@ -883,8 +897,8 @@ mlir::Value newArgs[]{vArg1}; if (vecTyInfo.isFloat32()) { - // TODO: Handle element ordering - newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); + if (changeVecElemOrder()) + newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); const llvm::StringRef fname{"llvm.ppc.vsx.xvcvspdp"}; auto ftype{ @@ -905,8 +919,8 @@ auto mvf32Ty{mlir::VectorType::get(4, f32type)}; newArgs[0] = builder.createConvert(loc, mvf32Ty, newArgs[0]); - // TODO: Handle element ordering - newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); + if (changeVecElemOrder()) + newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); return builder.createConvert(loc, fvf32Ty, newArgs[0]); } Index: flang/test/Driver/driver-help-hidden.f90 =================================================================== --- flang/test/Driver/driver-help-hidden.f90 +++ flang/test/Driver/driver-help-hidden.f90 @@ -59,6 +59,8 @@ ! CHECK-NEXT: Set OpenMP version (e.g. 45 for OpenMP 4.5, 50 for OpenMP 5.0). Default value is 50 for Clang and 11 for Flang ! CHECK-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! CHECK-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). +! CHECK-NEXT: -fppc-native-vector-element-order +! CHECK-NEXT: Specifies PowerPC vector element order ! CHECK-NEXT: -freciprocal-math Allow division operations to be reassociated ! CHECK-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! CHECK-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages Index: flang/test/Driver/driver-help.f90 =================================================================== --- flang/test/Driver/driver-help.f90 +++ flang/test/Driver/driver-help.f90 @@ -55,6 +55,8 @@ ! HELP-NEXT: Set OpenMP version (e.g. 45 for OpenMP 4.5, 50 for OpenMP 5.0). Default value is 50 for Clang and 11 for Flang ! HELP-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). +! HELP-NEXT: -fppc-native-vector-element-order +! HELP-NEXT: Specifies PowerPC vector element order ! HELP-NEXT: -freciprocal-math Allow division operations to be reassociated ! HELP-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! HELP-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages @@ -165,6 +167,8 @@ ! HELP-FC1-NEXT: Set OpenMP version (e.g. 45 for OpenMP 4.5, 50 for OpenMP 5.0). Default value is 50 for Clang and 11 for Flang ! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-FC1-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). +! HELP-FC1-NEXT: -fppc-native-vector-element-order +! HELP-FC1-NEXT: Specifies PowerPC vector element order ! HELP-FC1-NEXT: -freciprocal-math Allow division operations to be reassociated ! HELP-FC1-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! HELP-FC1-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages Index: flang/test/Lower/PowerPC/ppc-vec_cvf-elem-order.f90 =================================================================== --- /dev/null +++ flang/test/Lower/PowerPC/ppc-vec_cvf-elem-order.f90 @@ -0,0 +1,37 @@ +! RUN: bbc -emit-fir %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="CHECK-FIR" %s +! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="CHECK" %s +! REQUIRES: target=powerpc{{.*}} + +! CHECK-LABEL: vec_cvf_test_r4r8 +subroutine vec_cvf_test_r4r8(arg1) + vector(real(8)), intent(in) :: arg1 + vector(real(4)) :: r + r = vec_cvf(arg1) + +! CHECK-FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvdpsp(%[[carg]]) fastmath : (vector<2xf64>) -> !fir.vector<4:f32> +! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[call]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[r:.*]] = fir.convert %[[ccall]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref> + +! CHECK: %[[arg:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[call:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvcvdpsp(<2 x double> %[[arg]]) +! CHECK: store <4 x float> %[[call]], ptr %{{.*}}, align 16 +end subroutine vec_cvf_test_r4r8 + +! CHECK-LABEL: vec_cvf_test_r8r4 +subroutine vec_cvf_test_r8r4(arg1) + vector(real(4)), intent(in) :: arg1 + vector(real(8)) :: r + r = vec_cvf(arg1) + +! CHECK-FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvspdp(%[[carg]]) fastmath : (vector<4xf32>) -> !fir.vector<2:f64> +! CHECK-FIR: fir.store %[[call]] to %{{.*}} : !fir.ref> + +! CHECK: %[[arg:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvcvspdp(<4 x float> %[[arg]]) +! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_cvf_test_r8r4 Index: flang/tools/bbc/bbc.cpp =================================================================== --- flang/tools/bbc/bbc.cpp +++ flang/tools/bbc/bbc.cpp @@ -186,6 +186,11 @@ llvm::cl::desc("enable polymorphic type lowering (experimental)"), llvm::cl::init(false)); +static llvm::cl::opt enableNoPPCNativeVecElemOrder( + "fno-ppc-native-vector-element-order", + llvm::cl::desc("no PowerPC native vector element order."), + llvm::cl::init(false)); + static llvm::cl::opt useHLFIR("hlfir", llvm::cl::desc("Lower to high level FIR"), llvm::cl::init(false)); @@ -285,6 +290,7 @@ // Use default lowering options for bbc. Fortran::lower::LoweringOptions loweringOptions{}; loweringOptions.setPolymorphicTypeImpl(enablePolymorphic); + loweringOptions.setNoPPCNativeVecElemOrder(enableNoPPCNativeVecElemOrder); loweringOptions.setLowerToHighLevelFIR(useHLFIR || emitHLFIR); auto burnside = Fortran::lower::LoweringBridge::create( ctx, semanticsContext, defKinds, semanticsContext.intrinsics(),