Index: llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp +++ llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp @@ -14,6 +14,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopAccessAnalysis.h" @@ -36,7 +37,9 @@ using namespace llvm; using namespace PatternMatch; -#define DEBUG_TYPE "parallel-dsp" +#define DEBUG_TYPE "arm-parallel-dsp" + +STATISTIC(NumSMLAD , "Number of smlad instructions generated"); namespace { struct ParallelMAC; @@ -604,6 +607,7 @@ Value* Args[] = { VecLd0, VecLd1, Acc }; Function *SMLAD = Intrinsic::getDeclaration(M, Intrinsic::arm_smlad); CallInst *Call = Builder.CreateCall(SMLAD, Args); + NumSMLAD++; return Call; } @@ -613,7 +617,7 @@ char ARMParallelDSP::ID = 0; -INITIALIZE_PASS_BEGIN(ARMParallelDSP, "parallel-dsp", +INITIALIZE_PASS_BEGIN(ARMParallelDSP, "arm-parallel-dsp", "Transform loops to use DSP intrinsics", false, false) -INITIALIZE_PASS_END(ARMParallelDSP, "parallel-dsp", +INITIALIZE_PASS_END(ARMParallelDSP, "arm-parallel-dsp", "Transform loops to use DSP intrinsics", false, false) Index: llvm/trunk/test/CodeGen/ARM/smlad0.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/smlad0.ll +++ llvm/trunk/test/CodeGen/ARM/smlad0.ll @@ -1,10 +1,10 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; The Cortex-M0 does not support unaligned accesses: -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m0 < %s -parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m0 < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED ; ; Check DSP extension: -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED ; ; CHECK: %mac1{{\.}}026 = phi i32 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ] ; CHECK: [[V4:%[0-9]+]] = bitcast i16* %arrayidx3 to i32* Index: llvm/trunk/test/CodeGen/ARM/smlad1.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/smlad1.ll +++ llvm/trunk/test/CodeGen/ARM/smlad1.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; CHECK-LABEL: @test1 ; CHECK: %mac1{{\.}}026 = phi i32 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ] Index: llvm/trunk/test/CodeGen/ARM/smlad10.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/smlad10.ll +++ llvm/trunk/test/CodeGen/ARM/smlad10.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; Reduction statement is an i64 type: we only support i32 so check that the ; rewrite isn't triggered. Index: llvm/trunk/test/CodeGen/ARM/smlad11.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/smlad11.ll +++ llvm/trunk/test/CodeGen/ARM/smlad11.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; REQUIRES: asserts +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S -stats 2>&1 | FileCheck %s ; ; A more complicated chain: 4 mul operations, so we expect 2 smlad calls. ; @@ -15,9 +16,10 @@ ; CHECK: [[V17:%[0-9]+]] = call i32 @llvm.arm.smlad(i32 [[V14]], i32 [[V16]], i32 [[V12]]) ; ; And we don't want to see a 3rd smlad: -; ; CHECK-NOT: call i32 @llvm.arm.smlad ; +; CHECK: 2 arm-parallel-dsp - Number of smlad instructions generated +; define dso_local i32 @test(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { entry: %cmp52 = icmp sgt i32 %arg, 0 Index: llvm/trunk/test/CodeGen/ARM/smlad12.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/smlad12.ll +++ llvm/trunk/test/CodeGen/ARM/smlad12.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; The loop header is not the loop latch. ; Index: llvm/trunk/test/CodeGen/ARM/smlad2.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/smlad2.ll +++ llvm/trunk/test/CodeGen/ARM/smlad2.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; Operands of both muls are not symmetrical (see also comments inlined below), check ; that the rewrite isn't triggered. @@ -49,4 +49,3 @@ %exitcond = icmp ne i32 %add, %arg br i1 %exitcond, label %for.body, label %for.cond.cleanup } - Index: llvm/trunk/test/CodeGen/ARM/smlad3.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/smlad3.ll +++ llvm/trunk/test/CodeGen/ARM/smlad3.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; The loads are not consecutive: check that the rewrite isn't triggered. ; Index: llvm/trunk/test/CodeGen/ARM/smlad4.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/smlad4.ll +++ llvm/trunk/test/CodeGen/ARM/smlad4.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; The loads are not narrow loads: check that the rewrite isn't triggered. ; Index: llvm/trunk/test/CodeGen/ARM/smlad5.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/smlad5.ll +++ llvm/trunk/test/CodeGen/ARM/smlad5.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; The loads are volatile loads: check that the rewrite isn't triggered. ; Index: llvm/trunk/test/CodeGen/ARM/smlad6.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/smlad6.ll +++ llvm/trunk/test/CodeGen/ARM/smlad6.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; Alias check: check that the rewrite isn't triggered when there's a store ; instruction possibly aliasing any mul load operands; arguments are passed Index: llvm/trunk/test/CodeGen/ARM/smlad7.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/smlad7.ll +++ llvm/trunk/test/CodeGen/ARM/smlad7.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; Alias check: check that the rewrite isn't triggered when there's a store ; aliasing one of the mul load operands. Arguments are now annotated with Index: llvm/trunk/test/CodeGen/ARM/smlad8.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/smlad8.ll +++ llvm/trunk/test/CodeGen/ARM/smlad8.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; Mul with operands that are not simple load and sext/zext chains: this is not ; yet supported so the rewrite shouldn't trigger (but we do want to support this Index: llvm/trunk/test/CodeGen/ARM/smlad9.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/smlad9.ll +++ llvm/trunk/test/CodeGen/ARM/smlad9.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; Muls with operands that are constants: not yet supported, so the rewrite ; should not trigger (but we do want to add this soon).