Index: lib/Target/ARM/ARMParallelDSP.cpp
===================================================================
--- lib/Target/ARM/ARMParallelDSP.cpp
+++ lib/Target/ARM/ARMParallelDSP.cpp
@@ -14,6 +14,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
@@ -36,7 +37,9 @@
 using namespace llvm;
 using namespace PatternMatch;
 
-#define DEBUG_TYPE "parallel-dsp"
+#define DEBUG_TYPE "arm-parallel-dsp"
+
+STATISTIC(NumSMLAD , "Number of smlad instructions generated");
 
 namespace {
   struct ParallelMAC;
@@ -604,6 +607,7 @@
   Value* Args[] = { VecLd0, VecLd1, Acc };
   Function *SMLAD = Intrinsic::getDeclaration(M, Intrinsic::arm_smlad);
   CallInst *Call = Builder.CreateCall(SMLAD, Args);
+  NumSMLAD++;
   return Call;
 }
 
@@ -613,7 +617,7 @@
 
 char ARMParallelDSP::ID = 0;
 
-INITIALIZE_PASS_BEGIN(ARMParallelDSP, "parallel-dsp",
+INITIALIZE_PASS_BEGIN(ARMParallelDSP, "arm-parallel-dsp",
                 "Transform loops to use DSP intrinsics", false, false)
-INITIALIZE_PASS_END(ARMParallelDSP, "parallel-dsp",
+INITIALIZE_PASS_END(ARMParallelDSP, "arm-parallel-dsp",
                 "Transform loops to use DSP intrinsics", false, false)
Index: test/CodeGen/ARM/smlad0.ll
===================================================================
--- test/CodeGen/ARM/smlad0.ll
+++ test/CodeGen/ARM/smlad0.ll
@@ -1,10 +1,10 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S -stats 2>&1 | FileCheck %s
 ;
 ; The Cortex-M0 does not support unaligned accesses:
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m0 < %s -parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m0 < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
 ;
 ; Check DSP extension:
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
 ;
 ; CHECK:  %mac1{{\.}}026 = phi i32 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ]
 ; CHECK:  [[V4:%[0-9]+]] = bitcast i16* %arrayidx3 to i32*
@@ -13,6 +13,8 @@
 ; CHECK:  [[V7:%[0-9]+]] = load i32, i32* [[V6]], align 2
 ; CHECK:  [[V8]] = call i32 @llvm.arm.smlad(i32 [[V5]], i32 [[V7]], i32 %mac1{{\.}}026)
 ;
+; CHECK:  1 arm-parallel-dsp - Number of smlad instructions generated
+;
 ; CHECK-UNSUPPORTED-NOT:  call i32 @llvm.arm.smlad
 ;
 define dso_local i32 @test(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
Index: test/CodeGen/ARM/smlad1.ll
===================================================================
--- test/CodeGen/ARM/smlad1.ll
+++ test/CodeGen/ARM/smlad1.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 
 ; CHECK-LABEL: @test1
 ; CHECK:  %mac1{{\.}}026 = phi i32 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ]
Index: test/CodeGen/ARM/smlad10.ll
===================================================================
--- test/CodeGen/ARM/smlad10.ll
+++ test/CodeGen/ARM/smlad10.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; Reduction statement is an i64 type: we only support i32 so check that the
 ; rewrite isn't triggered.
Index: test/CodeGen/ARM/smlad11.ll
===================================================================
--- test/CodeGen/ARM/smlad11.ll
+++ test/CodeGen/ARM/smlad11.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S -stats 2>&1 | FileCheck %s
 ;
 ; A more complicated chain: 4 mul operations, so we expect 2 smlad calls.
 ;
@@ -18,6 +18,8 @@
 ;
 ; CHECK-NOT: call i32 @llvm.arm.smlad
 ;
+; CHECK:  2 arm-parallel-dsp - Number of smlad instructions generated
+;
 define dso_local i32 @test(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
 entry:
   %cmp52 = icmp sgt i32 %arg, 0
Index: test/CodeGen/ARM/smlad12.ll
===================================================================
--- test/CodeGen/ARM/smlad12.ll
+++ test/CodeGen/ARM/smlad12.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; The loop header is not the loop latch.
 ;
Index: test/CodeGen/ARM/smlad2.ll
===================================================================
--- test/CodeGen/ARM/smlad2.ll
+++ test/CodeGen/ARM/smlad2.ll
@@ -1,10 +1,12 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S -stats 2>&1 | FileCheck %s
 ;
 ; Operands of both muls are not symmetrical (see also comments inlined below), check
 ; that the rewrite isn't triggered.
 ;
 ; CHECK-NOT:  call i32 @llvm.arm.smlad
 ;
+; CHECK-NOT:  arm-parallel-dsp - Number of smlad instructions generated
+;
 define dso_local i32 @test(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
 entry:
   %cmp24 = icmp sgt i32 %arg, 0
Index: test/CodeGen/ARM/smlad3.ll
===================================================================
--- test/CodeGen/ARM/smlad3.ll
+++ test/CodeGen/ARM/smlad3.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; The loads are not consecutive: check that the rewrite isn't triggered.
 ;
Index: test/CodeGen/ARM/smlad4.ll
===================================================================
--- test/CodeGen/ARM/smlad4.ll
+++ test/CodeGen/ARM/smlad4.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; The loads are not narrow loads: check that the rewrite isn't triggered.
 ;
Index: test/CodeGen/ARM/smlad5.ll
===================================================================
--- test/CodeGen/ARM/smlad5.ll
+++ test/CodeGen/ARM/smlad5.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; The loads are volatile loads: check that the rewrite isn't triggered.
 ;
Index: test/CodeGen/ARM/smlad6.ll
===================================================================
--- test/CodeGen/ARM/smlad6.ll
+++ test/CodeGen/ARM/smlad6.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; Alias check: check that the rewrite isn't triggered when there's a store
 ; instruction possibly aliasing any mul load operands; arguments are passed
Index: test/CodeGen/ARM/smlad7.ll
===================================================================
--- test/CodeGen/ARM/smlad7.ll
+++ test/CodeGen/ARM/smlad7.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; Alias check: check that the rewrite isn't triggered when there's a store
 ; aliasing one of the mul load operands. Arguments are now annotated with
Index: test/CodeGen/ARM/smlad8.ll
===================================================================
--- test/CodeGen/ARM/smlad8.ll
+++ test/CodeGen/ARM/smlad8.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; Mul with operands that are not simple load and sext/zext chains: this is not
 ; yet supported so the rewrite shouldn't trigger (but we do want to support this
Index: test/CodeGen/ARM/smlad9.ll
===================================================================
--- test/CodeGen/ARM/smlad9.ll
+++ test/CodeGen/ARM/smlad9.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; Muls with operands that are constants: not yet supported, so the rewrite
 ; should not trigger (but we do want to add this soon).