Index: clang/test/CodeGen/matrix-lowering.c =================================================================== --- /dev/null +++ clang/test/CodeGen/matrix-lowering.c @@ -0,0 +1,21 @@ +// RUN: %clang -O0 -fenable-matrix -target aarch64-linux-eabi %s -S -emit-llvm -o - | FileCheck %s +// RUN: %clang -O1 -fenable-matrix -target aarch64-linux-eabi %s -S -emit-llvm -o - | FileCheck %s +// RUN: %clang -O2 -fenable-matrix -target aarch64-linux-eabi %s -S -emit-llvm -o - | FileCheck %s +// RUN: %clang -O3 -fenable-matrix -target aarch64-linux-eabi %s -S -emit-llvm -o - | FileCheck %s +// RUN: %clang -Ofast -fenable-matrix -target aarch64-linux-eabi %s -S -emit-llvm -o - | FileCheck %s +// RUN: %clang -Os -fenable-matrix -target aarch64-linux-eabi %s -S -emit-llvm -o - | FileCheck %s +// RUN: %clang -Oz -fenable-matrix -target aarch64-linux-eabi %s -S -emit-llvm -o - | FileCheck %s + +// CHECK-NOT: @llvm.matrix.multiply + +typedef float m4x4_t __attribute__((matrix_type(4, 4))); + +m4x4_t f(m4x4_t a, m4x4_t b, m4x4_t c) { +// +// CHECK-LAVEL: f( +// CHECK-NOT: @llvm.matrix.multiply +// CHECK: } +// + return a + b * c; +} + Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -531,6 +531,9 @@ // new unnamed globals. MPM.add(createNameAnonGlobalPass()); } + // Matrix intrinsics need to lowered also at -O0, but don't run CSE as a + // clean-up after it, which we do with OptLevel > 0. + MPM.add(createLowerMatrixIntrinsicsPass()); return; }