Index: llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp
+++ llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp
@@ -14,6 +14,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
@@ -36,7 +37,9 @@
 using namespace llvm;
 using namespace PatternMatch;
 
-#define DEBUG_TYPE "parallel-dsp"
+#define DEBUG_TYPE "arm-parallel-dsp"
+
+STATISTIC(NumSMLAD , "Number of smlad instructions generated");
 
 namespace {
   struct ParallelMAC;
@@ -604,6 +607,7 @@
   Value* Args[] = { VecLd0, VecLd1, Acc };
   Function *SMLAD = Intrinsic::getDeclaration(M, Intrinsic::arm_smlad);
   CallInst *Call = Builder.CreateCall(SMLAD, Args);
+  NumSMLAD++;
   return Call;
 }
 
@@ -613,7 +617,7 @@
 
 char ARMParallelDSP::ID = 0;
 
-INITIALIZE_PASS_BEGIN(ARMParallelDSP, "parallel-dsp",
+INITIALIZE_PASS_BEGIN(ARMParallelDSP, "arm-parallel-dsp",
                 "Transform loops to use DSP intrinsics", false, false)
-INITIALIZE_PASS_END(ARMParallelDSP, "parallel-dsp",
+INITIALIZE_PASS_END(ARMParallelDSP, "arm-parallel-dsp",
                 "Transform loops to use DSP intrinsics", false, false)
Index: llvm/trunk/test/CodeGen/ARM/smlad0.ll
===================================================================
--- llvm/trunk/test/CodeGen/ARM/smlad0.ll
+++ llvm/trunk/test/CodeGen/ARM/smlad0.ll
@@ -1,10 +1,10 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; The Cortex-M0 does not support unaligned accesses:
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m0 < %s -parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m0 < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
 ;
 ; Check DSP extension:
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
 ;
 ; CHECK:  %mac1{{\.}}026 = phi i32 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ]
 ; CHECK:  [[V4:%[0-9]+]] = bitcast i16* %arrayidx3 to i32*
Index: llvm/trunk/test/CodeGen/ARM/smlad1.ll
===================================================================
--- llvm/trunk/test/CodeGen/ARM/smlad1.ll
+++ llvm/trunk/test/CodeGen/ARM/smlad1.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 
 ; CHECK-LABEL: @test1
 ; CHECK:  %mac1{{\.}}026 = phi i32 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ]
Index: llvm/trunk/test/CodeGen/ARM/smlad10.ll
===================================================================
--- llvm/trunk/test/CodeGen/ARM/smlad10.ll
+++ llvm/trunk/test/CodeGen/ARM/smlad10.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; Reduction statement is an i64 type: we only support i32 so check that the
 ; rewrite isn't triggered.
Index: llvm/trunk/test/CodeGen/ARM/smlad11.ll
===================================================================
--- llvm/trunk/test/CodeGen/ARM/smlad11.ll
+++ llvm/trunk/test/CodeGen/ARM/smlad11.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; REQUIRES: asserts
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S -stats 2>&1 | FileCheck %s
 ;
 ; A more complicated chain: 4 mul operations, so we expect 2 smlad calls.
 ;
@@ -15,9 +16,10 @@
 ; CHECK:  [[V17:%[0-9]+]] = call i32 @llvm.arm.smlad(i32 [[V14]], i32 [[V16]], i32 [[V12]])
 ;
 ; And we don't want to see a 3rd smlad:
-;
 ; CHECK-NOT: call i32 @llvm.arm.smlad
 ;
+; CHECK:  2 arm-parallel-dsp - Number of smlad instructions generated
+;
 define dso_local i32 @test(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
 entry:
   %cmp52 = icmp sgt i32 %arg, 0
Index: llvm/trunk/test/CodeGen/ARM/smlad12.ll
===================================================================
--- llvm/trunk/test/CodeGen/ARM/smlad12.ll
+++ llvm/trunk/test/CodeGen/ARM/smlad12.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; The loop header is not the loop latch.
 ;
Index: llvm/trunk/test/CodeGen/ARM/smlad2.ll
===================================================================
--- llvm/trunk/test/CodeGen/ARM/smlad2.ll
+++ llvm/trunk/test/CodeGen/ARM/smlad2.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; Operands of both muls are not symmetrical (see also comments inlined below), check
 ; that the rewrite isn't triggered.
@@ -49,4 +49,3 @@
   %exitcond = icmp ne i32 %add, %arg
   br i1 %exitcond, label %for.body, label %for.cond.cleanup
 }
-
Index: llvm/trunk/test/CodeGen/ARM/smlad3.ll
===================================================================
--- llvm/trunk/test/CodeGen/ARM/smlad3.ll
+++ llvm/trunk/test/CodeGen/ARM/smlad3.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; The loads are not consecutive: check that the rewrite isn't triggered.
 ;
Index: llvm/trunk/test/CodeGen/ARM/smlad4.ll
===================================================================
--- llvm/trunk/test/CodeGen/ARM/smlad4.ll
+++ llvm/trunk/test/CodeGen/ARM/smlad4.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; The loads are not narrow loads: check that the rewrite isn't triggered.
 ;
Index: llvm/trunk/test/CodeGen/ARM/smlad5.ll
===================================================================
--- llvm/trunk/test/CodeGen/ARM/smlad5.ll
+++ llvm/trunk/test/CodeGen/ARM/smlad5.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; The loads are volatile loads: check that the rewrite isn't triggered.
 ;
Index: llvm/trunk/test/CodeGen/ARM/smlad6.ll
===================================================================
--- llvm/trunk/test/CodeGen/ARM/smlad6.ll
+++ llvm/trunk/test/CodeGen/ARM/smlad6.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; Alias check: check that the rewrite isn't triggered when there's a store
 ; instruction possibly aliasing any mul load operands; arguments are passed
Index: llvm/trunk/test/CodeGen/ARM/smlad7.ll
===================================================================
--- llvm/trunk/test/CodeGen/ARM/smlad7.ll
+++ llvm/trunk/test/CodeGen/ARM/smlad7.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; Alias check: check that the rewrite isn't triggered when there's a store
 ; aliasing one of the mul load operands. Arguments are now annotated with
Index: llvm/trunk/test/CodeGen/ARM/smlad8.ll
===================================================================
--- llvm/trunk/test/CodeGen/ARM/smlad8.ll
+++ llvm/trunk/test/CodeGen/ARM/smlad8.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; Mul with operands that are not simple load and sext/zext chains: this is not
 ; yet supported so the rewrite shouldn't trigger (but we do want to support this
Index: llvm/trunk/test/CodeGen/ARM/smlad9.ll
===================================================================
--- llvm/trunk/test/CodeGen/ARM/smlad9.ll
+++ llvm/trunk/test/CodeGen/ARM/smlad9.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s
+; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s
 ;
 ; Muls with operands that are constants: not yet supported, so the rewrite
 ; should not trigger (but we do want to add this soon).