diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -74,6 +74,7 @@ #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" @@ -1372,6 +1373,13 @@ } if (CodeGenOpts.OptimizationLevel == 0) { + // FIXME: the backends do not handle matrix intrinsics currently. Make + // sure they are also lowered in O0. A lightweight version of the pass + // should run in the backend pipeline on demand. + if (LangOpts.MatrixTypes) + MPM.addPass( + createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass())); + addCoroutinePassesAtO0(MPM, LangOpts, CodeGenOpts); addSanitizersAtO0(MPM, TargetTriple, LangOpts, CodeGenOpts); } diff --git a/clang/test/CodeGen/matrix-lowering-opt-levels.c b/clang/test/CodeGen/matrix-lowering-opt-levels.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/matrix-lowering-opt-levels.c @@ -0,0 +1,28 @@ +// RUN: %clang -O0 -fenable-matrix -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -O1 -fenable-matrix -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -O2 -fenable-matrix -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -O3 -fenable-matrix -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -Ofast -fenable-matrix -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -Os -fenable-matrix -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -Oz -fenable-matrix -S -emit-llvm %s -o - | FileCheck %s + +// RUN: %clang -O0 -fenable-matrix -fexperimental-new-pass-manager -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -O1 -fenable-matrix -fexperimental-new-pass-manager -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -O2 -fenable-matrix -fexperimental-new-pass-manager -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -O3 -fenable-matrix -fexperimental-new-pass-manager -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -Ofast -fenable-matrix -fexperimental-new-pass-manager -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -Os -fenable-matrix -fexperimental-new-pass-manager -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -Oz -fenable-matrix -fexperimental-new-pass-manager -S -emit-llvm %s -o - | FileCheck %s + +// Smoke test that the matrix intrinsics are lowered at any optimisation level. + +typedef float m4x4_t __attribute__((matrix_type(4, 4))); + +m4x4_t f(m4x4_t a, m4x4_t b, m4x4_t c) { + // + // CHECK-LABEL: f( + // CHECK-NOT: @llvm.matrix + // CHECK: } + // + return a + b * c; +} diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -275,6 +275,8 @@ extern cl::opt AttributorRun; extern cl::opt EnableKnowledgeRetention; +extern cl::opt EnableMatrix; + const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = { /*SpeedLevel*/ 0, /*SizeLevel*/ 0}; @@ -1093,6 +1095,11 @@ OptimizePM.addPass(Float2IntPass()); OptimizePM.addPass(LowerConstantIntrinsicsPass()); + if (EnableMatrix) { + OptimizePM.addPass(LowerMatrixIntrinsicsPass()); + OptimizePM.addPass(EarlyCSEPass()); + } + // FIXME: We need to run some loop optimizations to re-rotate loops after // simplify-cfg and others undo their rotation. diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -149,9 +149,9 @@ "enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)")); -static cl::opt - EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, - cl::desc("Enable lowering of the matrix intrinsics")); +cl::opt EnableMatrix( + "enable-matrix", cl::init(false), cl::Hidden, + cl::desc("Enable lowering of the matrix intrinsics")); cl::opt AttributorRun( "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),