Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -5403,6 +5403,7 @@ defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stride", PosFlag, NegFlag>; +defm ppc_vec_order : OptInFC1FFlag<"ppc-native-vector-element-order", "Specifies PowerPC vector element order">; } // let Flags = [FC1Option, FlangOption, FlangOnlyOption] def J : JoinedOrSeparate<["-"], "J">, Index: clang/lib/Driver/ToolChains/Flang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Flang.cpp +++ clang/lib/Driver/ToolChains/Flang.cpp @@ -110,12 +110,13 @@ } void Flang::addOtherOptions(const ArgList &Args, ArgStringList &CmdArgs) const { - Args.AddAllArgs(CmdArgs, - {options::OPT_module_dir, options::OPT_fdebug_module_writer, - options::OPT_fintrinsic_modules_path, options::OPT_pedantic, - options::OPT_std_EQ, options::OPT_W_Joined, - options::OPT_fconvert_EQ, options::OPT_fpass_plugin_EQ, - options::OPT_funderscoring, options::OPT_fno_underscoring}); + Args.AddAllArgs( + CmdArgs, {options::OPT_module_dir, options::OPT_fdebug_module_writer, + options::OPT_fintrinsic_modules_path, options::OPT_pedantic, + options::OPT_std_EQ, options::OPT_W_Joined, + options::OPT_fconvert_EQ, options::OPT_fpass_plugin_EQ, + options::OPT_funderscoring, options::OPT_fno_underscoring, + options::OPT_fppc_vec_order, options::OPT_fno_ppc_vec_order}); llvm::codegenoptions::DebugInfoKind DebugInfoKind; if (Args.hasArg(options::OPT_gN_Group)) { Index: flang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- flang/lib/Frontend/CompilerInvocation.cpp +++ flang/lib/Frontend/CompilerInvocation.cpp @@ -555,6 +555,12 @@ } } + // -f{no-}ppc-native-vector-element-order + if (args.hasFlag(clang::driver::options::OPT_fno_ppc_vec_order, + clang::driver::options::OPT_fppc_vec_order, false)) { + opts.llvmArgs.push_back("-fppc-native-vector-element-order=false"); + } + setUpFrontendBasedOnAction(opts); opts.dashX = dashX; @@ -925,6 +931,11 @@ res.loweringOpts.setPolymorphicTypeImpl(true); } + res.frontendOpts.llvmArgs = + args.getAllArgValues(clang::driver::options::OPT_mllvm); + res.frontendOpts.mlirArgs = + args.getAllArgValues(clang::driver::options::OPT_mmlir); + success &= parseFrontendArgs(res.getFrontendOpts(), args, diags); parseTargetArgs(res.getTargetOpts(), args); parsePreprocessorArgs(res.getPreprocessorOpts(), args); @@ -933,12 +944,6 @@ success &= parseSemaArgs(res, args, diags); success &= parseDialectArgs(res, args, diags); success &= parseDiagArgs(res, args, diags); - res.frontendOpts.llvmArgs = - args.getAllArgValues(clang::driver::options::OPT_mllvm); - - res.frontendOpts.mlirArgs = - args.getAllArgValues(clang::driver::options::OPT_mmlir); - success &= parseFloatingPointArgs(res, args, diags); // Set the string to be used as the return value of the COMPILER_OPTIONS Index: flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp =================================================================== --- flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp +++ flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp @@ -14,13 +14,24 @@ //===----------------------------------------------------------------------===// #include "flang/Optimizer/Builder/PPCIntrinsicCall.h" +#include "flang/Evaluate/common.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/IntrinsicCall.h" #include "flang/Optimizer/Builder/MutableBox.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "llvm/Support/CommandLine.h" + +static llvm::cl::opt + nativeVecElementOrder("fppc-native-vector-element-order", + llvm::cl::desc("Specifies vector element order"), + llvm::cl::init(true)); namespace fir { +static bool changeVecElemOrder() { + return (Fortran::evaluate::isHostLittleEndian == nativeVecElementOrder); +} + using PI = PPCIntrinsicLibrary; // PPC specific intrinsic handlers. @@ -792,8 +803,8 @@ mlir::Value newArgs[]{vArg1}; if (vecTyInfo.isFloat32()) { - // TODO: Handle element ordering - newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); + if (changeVecElemOrder()) + newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); const llvm::StringRef fname{"llvm.ppc.vsx.xvcvspdp"}; auto ftype{ @@ -814,8 +825,8 @@ auto mvf32Ty{mlir::VectorType::get(4, f32type)}; newArgs[0] = builder.createConvert(loc, mvf32Ty, newArgs[0]); - // TODO: Handle element ordering - newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); + if (changeVecElemOrder()) + newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); return builder.createConvert(loc, fvf32Ty, newArgs[0]); } Index: flang/test/Driver/driver-help-hidden.f90 =================================================================== --- flang/test/Driver/driver-help-hidden.f90 +++ flang/test/Driver/driver-help-hidden.f90 @@ -59,6 +59,8 @@ ! CHECK-NEXT: Set OpenMP version (e.g. 45 for OpenMP 4.5, 50 for OpenMP 5.0). Default value is 50 for Clang and 11 for Flang ! CHECK-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! CHECK-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). +! CHECK-NEXT: -fppc-native-vector-element-order +! CHECK-NEXT: Specifies PowerPC vector element order ! CHECK-NEXT: -freciprocal-math Allow division operations to be reassociated ! CHECK-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! CHECK-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages Index: flang/test/Driver/driver-help.f90 =================================================================== --- flang/test/Driver/driver-help.f90 +++ flang/test/Driver/driver-help.f90 @@ -55,6 +55,8 @@ ! HELP-NEXT: Set OpenMP version (e.g. 45 for OpenMP 4.5, 50 for OpenMP 5.0). Default value is 50 for Clang and 11 for Flang ! HELP-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). +! HELP-NEXT: -fppc-native-vector-element-order +! HELP-NEXT: Specifies PowerPC vector element order ! HELP-NEXT: -freciprocal-math Allow division operations to be reassociated ! HELP-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! HELP-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages @@ -165,6 +167,8 @@ ! HELP-FC1-NEXT: Set OpenMP version (e.g. 45 for OpenMP 4.5, 50 for OpenMP 5.0). Default value is 50 for Clang and 11 for Flang ! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-FC1-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). +! HELP-FC1-NEXT: -fppc-native-vector-element-order +! HELP-FC1-NEXT: Specifies PowerPC vector element order ! HELP-FC1-NEXT: -freciprocal-math Allow division operations to be reassociated ! HELP-FC1-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! HELP-FC1-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages Index: flang/test/Lower/PowerPC/ppc-vec_cvf-elem-order.f90 =================================================================== --- /dev/null +++ flang/test/Lower/PowerPC/ppc-vec_cvf-elem-order.f90 @@ -0,0 +1,37 @@ +! RUN: bbc -emit-fir %s -fppc-native-vector-element-order=false -o - | FileCheck --check-prefixes="CHECK-FIR" %s +! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="CHECK" %s +! REQUIRES: target=powerpc{{.*}} + +! CHECK-LABEL: vec_cvf_test_r4r8 +subroutine vec_cvf_test_r4r8(arg1) + vector(real(8)), intent(in) :: arg1 + vector(real(4)) :: r + r = vec_cvf(arg1) + +! CHECK-FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<2:f64>) -> vector<2xf64> +! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvdpsp(%[[carg]]) fastmath : (vector<2xf64>) -> !fir.vector<4:f32> +! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[call]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[r:.*]] = fir.convert %[[ccall]] : (vector<4xf32>) -> !fir.vector<4:f32> +! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref> + +! CHECK: %[[arg:.*]] = load <2 x double>, ptr %{{.*}}, align 16 +! CHECK: %[[call:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvcvdpsp(<2 x double> %[[arg]]) +! CHECK: store <4 x float> %[[call]], ptr %{{.*}}, align 16 +end subroutine vec_cvf_test_r4r8 + +! CHECK-LABEL: vec_cvf_test_r8r4 +subroutine vec_cvf_test_r8r4(arg1) + vector(real(4)), intent(in) :: arg1 + vector(real(8)) :: r + r = vec_cvf(arg1) + +! CHECK-FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK-FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<4:f32>) -> vector<4xf32> +! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvspdp(%[[carg]]) fastmath : (vector<4xf32>) -> !fir.vector<2:f64> +! CHECK-FIR: fir.store %[[call]] to %{{.*}} : !fir.ref> + +! CHECK: %[[arg:.*]] = load <4 x float>, ptr %{{.*}}, align 16 +! CHECK: %[[r:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvcvspdp(<4 x float> %[[arg]]) +! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16 +end subroutine vec_cvf_test_r8r4