diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5416,6 +5416,9 @@ defm logical_abbreviations : OptInFC1FFlag<"logical-abbreviations", "Enable logical abbreviations">; defm implicit_none : OptInFC1FFlag<"implicit-none", "No implicit typing allowed unless overridden by IMPLICIT statements">; defm underscoring : OptInFC1FFlag<"underscoring", "Appends one trailing underscore to external names">; +defm ppc_native_vec_elem_order: BoolOptionWithoutMarshalling<"f", "ppc-native-vector-element-order", + PosFlag, + NegFlag>; def fno_automatic : Flag<["-"], "fno-automatic">, Group, HelpText<"Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE">; diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -144,7 +144,9 @@ CmdArgs.push_back("-fversion-loops-for-stride"); Args.AddAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir, - options::OPT_flang_experimental_polymorphism}); + options::OPT_flang_experimental_polymorphism, + options::OPT_fno_ppc_native_vec_elem_order, + options::OPT_fppc_native_vec_elem_order}); } void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const { diff --git a/flang/include/flang/Lower/CustomIntrinsicCall.h b/flang/include/flang/Lower/CustomIntrinsicCall.h --- a/flang/include/flang/Lower/CustomIntrinsicCall.h +++ b/flang/include/flang/Lower/CustomIntrinsicCall.h @@ -103,11 +103,12 @@ /// Generate the FIR+MLIR operations for the generic intrinsic \p name /// with argument \p args and expected result type \p resultType. /// Returned fir::ExtendedValue is the returned Fortran intrinsic value. -fir::ExtendedValue genIntrinsicCall(fir::FirOpBuilder &builder, - mlir::Location loc, llvm::StringRef name, - std::optional resultType, - llvm::ArrayRef args, - StatementContext &stmtCtx); +fir::ExtendedValue +genIntrinsicCall(fir::FirOpBuilder &builder, mlir::Location loc, + llvm::StringRef name, std::optional resultType, + llvm::ArrayRef args, + StatementContext &stmtCtx, + Fortran::lower::AbstractConverter *converter = nullptr); } // namespace lower } // namespace Fortran diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def --- a/flang/include/flang/Lower/LoweringOptions.def +++ b/flang/include/flang/Lower/LoweringOptions.def @@ -31,5 +31,8 @@ /// Off by default until fully ready. ENUM_LOWERINGOPT(LowerToHighLevelFIR, unsigned, 1, 0) +/// If true, reverse PowerPC native vector element order. +ENUM_LOWERINGOPT(NoPPCNativeVecElemOrder, unsigned, 1, 0) + #undef LOWERINGOPT #undef ENUM_LOWERINGOPT diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -9,6 +9,7 @@ #ifndef FORTRAN_LOWER_INTRINSICCALL_H #define FORTRAN_LOWER_INTRINSICCALL_H +#include "flang/Lower/AbstractConverter.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/Runtime/Character.h" @@ -34,7 +35,8 @@ std::pair genIntrinsicCall(fir::FirOpBuilder &, mlir::Location, llvm::StringRef name, std::optional resultType, - llvm::ArrayRef args); + llvm::ArrayRef args, + Fortran::lower::AbstractConverter *converter = nullptr); /// Enums used to templatize and share lowering of MIN and MAX. enum class Extremum { Min, Max }; @@ -124,8 +126,10 @@ struct IntrinsicLibrary { // Constructors. - explicit IntrinsicLibrary(fir::FirOpBuilder &builder, mlir::Location loc) - : builder{builder}, loc{loc} {} + explicit IntrinsicLibrary( + fir::FirOpBuilder &builder, mlir::Location loc, + Fortran::lower::AbstractConverter *converter = nullptr) + : builder{builder}, loc{loc}, converter{converter} {} IntrinsicLibrary() = delete; IntrinsicLibrary(const IntrinsicLibrary &) = delete; @@ -416,6 +420,7 @@ fir::FirOpBuilder &builder; mlir::Location loc; bool resultMustBeFreed = false; + Fortran::lower::AbstractConverter *converter = nullptr; }; struct IntrinsicDummyArgument { diff --git a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h --- a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h @@ -135,6 +135,11 @@ PPCIntrinsicLibrary() = delete; PPCIntrinsicLibrary(const PPCIntrinsicLibrary &) = delete; + // Helper functions for vector element ordering. + bool isBEVecElemOrderOnLE(); + bool isNativeVecElemOrderOnLE(); + bool changeVecElemOrder(); + // PPC MMA intrinsic generic handler template void genMmaIntr(llvm::ArrayRef); diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -954,6 +954,11 @@ res.loweringOpts.setPolymorphicTypeImpl(true); } + // -fno-ppc-native-vector-element-order + if (args.hasArg(clang::driver::options::OPT_fno_ppc_native_vec_elem_order)) { + res.loweringOpts.setNoPPCNativeVecElemOrder(true); + } + success &= parseFrontendArgs(res.getFrontendOpts(), args, diags); parseTargetArgs(res.getTargetOpts(), args); parsePreprocessorArgs(res.getPreprocessorOpts(), args); diff --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp --- a/flang/lib/Lower/ConvertExpr.cpp +++ b/flang/lib/Lower/ConvertExpr.cpp @@ -1928,7 +1928,7 @@ } // Let the intrinsic library lower the intrinsic procedure call return Fortran::lower::genIntrinsicCall(builder, getLoc(), name, resultType, - operands, stmtCtx); + operands, stmtCtx, &converter); } /// helper to detect statement functions diff --git a/flang/lib/Lower/CustomIntrinsicCall.cpp b/flang/lib/Lower/CustomIntrinsicCall.cpp --- a/flang/lib/Lower/CustomIntrinsicCall.cpp +++ b/flang/lib/Lower/CustomIntrinsicCall.cpp @@ -98,9 +98,10 @@ llvm::StringRef name, std::optional resultType, llvm::ArrayRef args, - Fortran::lower::StatementContext &stmtCtx) { + Fortran::lower::StatementContext &stmtCtx, + Fortran::lower::AbstractConverter *converter) { auto [result, mustBeFreed] = - fir::genIntrinsicCall(builder, loc, name, resultType, args); + fir::genIntrinsicCall(builder, loc, name, resultType, args, converter); if (mustBeFreed) { mlir::Value addr = fir::getBase(result); if (auto *box = result.getBoxOf()) diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -5770,9 +5770,10 @@ std::pair genIntrinsicCall(fir::FirOpBuilder &builder, mlir::Location loc, llvm::StringRef name, std::optional resultType, - llvm::ArrayRef args) { - return IntrinsicLibrary{builder, loc}.genIntrinsicCall(name, resultType, - args); + llvm::ArrayRef args, + Fortran::lower::AbstractConverter *converter) { + return IntrinsicLibrary{builder, loc, converter}.genIntrinsicCall( + name, resultType, args); } mlir::Value genMax(fir::FirOpBuilder &builder, mlir::Location loc, diff --git a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp --- a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp @@ -360,6 +360,20 @@ return ppcMathOps.equal_range(name); } +// Helper functions for vector element ordering. +bool PPCIntrinsicLibrary::isBEVecElemOrderOnLE() { + return (Fortran::evaluate::isHostLittleEndian && + converter->getLoweringOptions().getNoPPCNativeVecElemOrder()); +} +bool PPCIntrinsicLibrary::isNativeVecElemOrderOnLE() { + return (Fortran::evaluate::isHostLittleEndian && + !converter->getLoweringOptions().getNoPPCNativeVecElemOrder()); +} +bool PPCIntrinsicLibrary::changeVecElemOrder() { + return (Fortran::evaluate::isHostLittleEndian != + converter->getLoweringOptions().getNoPPCNativeVecElemOrder()); +} + static mlir::FunctionType genMmaVpFuncType(mlir::MLIRContext *context, int quadCnt, int pairCnt, int vecCnt, int intCnt = 0, @@ -1014,8 +1028,8 @@ mlir::Value newArgs[]{vArg1}; if (vecTyInfo.isFloat32()) { - // TODO: Handle element ordering - newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); + if (changeVecElemOrder()) + newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); const llvm::StringRef fname{"llvm.ppc.vsx.xvcvspdp"}; auto ftype{ @@ -1036,8 +1050,8 @@ auto mvf32Ty{mlir::VectorType::get(4, f32type)}; newArgs[0] = builder.createConvert(loc, mvf32Ty, newArgs[0]); - // TODO: Handle element ordering - newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); + if (changeVecElemOrder()) + newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); return builder.createConvert(loc, fvf32Ty, newArgs[0]); } diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90 --- a/flang/test/Driver/driver-help-hidden.f90 +++ b/flang/test/Driver/driver-help-hidden.f90 @@ -50,6 +50,8 @@ ! CHECK-NEXT: -fno-automatic Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE ! CHECK-NEXT: -fno-color-diagnostics Disable colors in diagnostics ! CHECK-NEXT: -fno-integrated-as Disable the integrated assembler +! CHECK-NEXT: -fno-ppc-native-vector-element-order +! CHECK-NEXT: Specifies PowerPC non-native vector element order ! CHECK-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! CHECK-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default) ! CHECK-NEXT: -fno-version-loops-for-stride @@ -63,6 +65,8 @@ ! CHECK-NEXT: -foptimization-record-passes= ! CHECK-NEXT: Only include passes which match a specified regular expression in the generated optimization record (by default, include all passes) ! CHECK-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). +! CHECK-NEXT: -fppc-native-vector-element-order +! CHECK-NEXT: Specifies PowerPC native vector element order ! CHECK-NEXT: -freciprocal-math Allow division operations to be reassociated ! CHECK-NEXT: -fsave-optimization-record= ! CHECK-NEXT: Generate an optimization record file in a specific format diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -46,6 +46,8 @@ ! HELP-NEXT: -fno-automatic Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE ! HELP-NEXT: -fno-color-diagnostics Disable colors in diagnostics ! HELP-NEXT: -fno-integrated-as Disable the integrated assembler +! HELP-NEXT: -fno-ppc-native-vector-element-order +! HELP-NEXT: Specifies PowerPC non-native vector element order ! HELP-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! HELP-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default) ! HELP-NEXT: -fno-version-loops-for-stride @@ -59,6 +61,8 @@ ! HELP-NEXT: -foptimization-record-passes= ! HELP-NEXT: Only include passes which match a specified regular expression in the generated optimization record (by default, include all passes) ! HELP-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). +! HELP-NEXT: -fppc-native-vector-element-order +! HELP-NEXT: Specifies PowerPC native vector element order ! HELP-NEXT: -freciprocal-math Allow division operations to be reassociated ! HELP-NEXT: -fsave-optimization-record= ! HELP-NEXT: Generate an optimization record file in a specific format @@ -158,6 +162,8 @@ ! HELP-FC1-NEXT: Do not use the analyzed objects when unparsing ! HELP-FC1-NEXT: -fno-automatic Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE ! HELP-FC1-NEXT: -fno-debug-pass-manager Disables debug printing for the new pass manager +! HELP-FC1-NEXT: -fno-ppc-native-vector-element-order +! HELP-FC1-NEXT: Specifies PowerPC non-native vector element order ! HELP-FC1-NEXT: -fno-reformat Dump the cooked character stream in -E mode ! HELP-FC1-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! HELP-FC1-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default) @@ -173,6 +179,8 @@ ! HELP-FC1-NEXT: Set OpenMP version (e.g. 45 for OpenMP 4.5, 50 for OpenMP 5.0). Default value is 50 for Clang and 11 for Flang ! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-FC1-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). +! HELP-FC1-NEXT: -fppc-native-vector-element-order +! HELP-FC1-NEXT: Specifies PowerPC native vector element order ! HELP-FC1-NEXT: -freciprocal-math Allow division operations to be reassociated ! HELP-FC1-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! HELP-FC1-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages diff --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90 --- a/flang/test/Driver/frontend-forwarding.f90 +++ b/flang/test/Driver/frontend-forwarding.f90 @@ -18,6 +18,8 @@ ! RUN: -fversion-loops-for-stride \ ! RUN: -flang-experimental-polymorphism \ ! RUN: -flang-experimental-hlfir \ +! RUN: -fno-ppc-native-vector-element-order \ +! RUN: -fppc-native-vector-element-order \ ! RUN: -mllvm -print-before-all \ ! RUN: -save-temps=obj \ ! RUN: -P \ @@ -40,5 +42,7 @@ ! CHECK: "-fversion-loops-for-stride" ! CHECK: "-flang-experimental-polymorphism" ! CHECK: "-flang-experimental-hlfir" +! CHECK: "-fno-ppc-native-vector-element-order" +! CHECK: "-fppc-native-vector-element-order" ! CHECK: "-mllvm" "-print-before-all" ! CHECK: "-save-temps=obj" diff --git a/flang/test/Lower/PowerPC/ppc-vec_cvf-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec_cvf-elem-order.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/PowerPC/ppc-vec_cvf-elem-order.f90 @@ -0,0 +1,37 @@ +! RUN: bbc -emit-fir %s -fno-ppc-native-vector-element-order=true -o - | FileCheck --check-prefixes="FIR" %s +! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="LLVMIR" %s +! REQUIRES: target=powerpc{{.*}} + +! CHECK-LABEL: vec_cvf_test_r4r8 +subroutine vec_cvf_test_r4r8(arg1) + vector(real(8)), intent(in) :: arg1 + vector(real(4)) :: r + r = vec_cvf(arg1) + +! FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref> +! FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<2:f64>) -> vector<2xf64> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvdpsp(%[[carg]]) fastmath : (vector<2xf64>) -> !fir.vector<4:f32> +! FIR: %[[ccall:.*]] = fir.convert %[[call]] : (!fir.vector<4:f32>) -> vector<4xf32> +! FIR: %[[r:.*]] = fir.convert %[[ccall]] : (vector<4xf32>) -> !fir.vector<4:f32> +! FIR: fir.store %[[r]] to %{{.*}} : !fir.ref> + +! LLVMIR: %[[arg:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! LLVMIR: %[[call:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvcvdpsp(<2 x double> %[[arg]]) +! LLVMIR: store <4 x float> %[[call]], ptr %{{.*}}, align 16 +end subroutine vec_cvf_test_r4r8 + +! CHECK-LABEL: vec_cvf_test_r8r4 +subroutine vec_cvf_test_r8r4(arg1) + vector(real(4)), intent(in) :: arg1 + vector(real(8)) :: r + r = vec_cvf(arg1) + +! FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref> +! FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<4:f32>) -> vector<4xf32> +! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvspdp(%[[carg]]) fastmath : (vector<4xf32>) -> !fir.vector<2:f64> +! FIR: fir.store %[[call]] to %{{.*}} : !fir.ref> + +! LLVMIR: %[[arg:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! LLVMIR: %[[r:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvcvspdp(<4 x float> %[[arg]]) +! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_cvf_test_r8r4 diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -186,6 +186,11 @@ llvm::cl::desc("enable polymorphic type lowering (experimental)"), llvm::cl::init(false)); +static llvm::cl::opt enableNoPPCNativeVecElemOrder( + "fno-ppc-native-vector-element-order", + llvm::cl::desc("no PowerPC native vector element order."), + llvm::cl::init(false)); + static llvm::cl::opt useHLFIR("hlfir", llvm::cl::desc("Lower to high level FIR"), llvm::cl::init(false)); @@ -289,6 +294,7 @@ // Use default lowering options for bbc. Fortran::lower::LoweringOptions loweringOptions{}; loweringOptions.setPolymorphicTypeImpl(enablePolymorphic); + loweringOptions.setNoPPCNativeVecElemOrder(enableNoPPCNativeVecElemOrder); loweringOptions.setLowerToHighLevelFIR(useHLFIR || emitHLFIR); auto burnside = Fortran::lower::LoweringBridge::create( ctx, semanticsContext, defKinds, semanticsContext.intrinsics(),