Index: lib/Target/ARM/ARMParallelDSP.cpp =================================================================== --- lib/Target/ARM/ARMParallelDSP.cpp +++ lib/Target/ARM/ARMParallelDSP.cpp @@ -14,6 +14,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopAccessAnalysis.h" @@ -36,7 +37,9 @@ using namespace llvm; using namespace PatternMatch; -#define DEBUG_TYPE "parallel-dsp" +#define DEBUG_TYPE "arm-parallel-dsp" + +STATISTIC(NumSMLAD , "Number of smlad instructions generated"); namespace { struct ParallelMAC; @@ -604,6 +607,7 @@ Value* Args[] = { VecLd0, VecLd1, Acc }; Function *SMLAD = Intrinsic::getDeclaration(M, Intrinsic::arm_smlad); CallInst *Call = Builder.CreateCall(SMLAD, Args); + NumSMLAD++; return Call; } @@ -613,7 +617,7 @@ char ARMParallelDSP::ID = 0; -INITIALIZE_PASS_BEGIN(ARMParallelDSP, "parallel-dsp", +INITIALIZE_PASS_BEGIN(ARMParallelDSP, "arm-parallel-dsp", "Transform loops to use DSP intrinsics", false, false) -INITIALIZE_PASS_END(ARMParallelDSP, "parallel-dsp", +INITIALIZE_PASS_END(ARMParallelDSP, "arm-parallel-dsp", "Transform loops to use DSP intrinsics", false, false) Index: test/CodeGen/ARM/smlad0.ll =================================================================== --- test/CodeGen/ARM/smlad0.ll +++ test/CodeGen/ARM/smlad0.ll @@ -1,10 +1,10 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S -stats 2>&1 | FileCheck %s ; ; The Cortex-M0 does not support unaligned accesses: -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m0 < %s -parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m0 < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED ; ; Check DSP extension: -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED ; ; CHECK: %mac1{{\.}}026 = phi i32 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ] ; CHECK: [[V4:%[0-9]+]] = bitcast i16* %arrayidx3 to i32* @@ -13,6 +13,8 @@ ; CHECK: [[V7:%[0-9]+]] = load i32, i32* [[V6]], align 2 ; CHECK: [[V8]] = call i32 @llvm.arm.smlad(i32 [[V5]], i32 [[V7]], i32 %mac1{{\.}}026) ; +; CHECK: 1 arm-parallel-dsp - Number of smlad instructions generated +; ; CHECK-UNSUPPORTED-NOT: call i32 @llvm.arm.smlad ; define dso_local i32 @test(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { Index: test/CodeGen/ARM/smlad1.ll =================================================================== --- test/CodeGen/ARM/smlad1.ll +++ test/CodeGen/ARM/smlad1.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; CHECK-LABEL: @test1 ; CHECK: %mac1{{\.}}026 = phi i32 [ [[V8:%[0-9]+]], %for.body ], [ 0, %for.body.preheader ] Index: test/CodeGen/ARM/smlad10.ll =================================================================== --- test/CodeGen/ARM/smlad10.ll +++ test/CodeGen/ARM/smlad10.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; Reduction statement is an i64 type: we only support i32 so check that the ; rewrite isn't triggered. Index: test/CodeGen/ARM/smlad11.ll =================================================================== --- test/CodeGen/ARM/smlad11.ll +++ test/CodeGen/ARM/smlad11.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S -stats 2>&1 | FileCheck %s ; ; A more complicated chain: 4 mul operations, so we expect 2 smlad calls. ; @@ -18,6 +18,8 @@ ; ; CHECK-NOT: call i32 @llvm.arm.smlad ; +; CHECK: 2 arm-parallel-dsp - Number of smlad instructions generated +; define dso_local i32 @test(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { entry: %cmp52 = icmp sgt i32 %arg, 0 Index: test/CodeGen/ARM/smlad12.ll =================================================================== --- test/CodeGen/ARM/smlad12.ll +++ test/CodeGen/ARM/smlad12.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; The loop header is not the loop latch. ; Index: test/CodeGen/ARM/smlad2.ll =================================================================== --- test/CodeGen/ARM/smlad2.ll +++ test/CodeGen/ARM/smlad2.ll @@ -1,10 +1,12 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S -stats 2>&1 | FileCheck %s ; ; Operands of both muls are not symmetrical (see also comments inlined below), check ; that the rewrite isn't triggered. ; ; CHECK-NOT: call i32 @llvm.arm.smlad ; +; CHECK-NOT: arm-parallel-dsp - Number of smlad instructions generated +; define dso_local i32 @test(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { entry: %cmp24 = icmp sgt i32 %arg, 0 Index: test/CodeGen/ARM/smlad3.ll =================================================================== --- test/CodeGen/ARM/smlad3.ll +++ test/CodeGen/ARM/smlad3.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; The loads are not consecutive: check that the rewrite isn't triggered. ; Index: test/CodeGen/ARM/smlad4.ll =================================================================== --- test/CodeGen/ARM/smlad4.ll +++ test/CodeGen/ARM/smlad4.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; The loads are not narrow loads: check that the rewrite isn't triggered. ; Index: test/CodeGen/ARM/smlad5.ll =================================================================== --- test/CodeGen/ARM/smlad5.ll +++ test/CodeGen/ARM/smlad5.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; The loads are volatile loads: check that the rewrite isn't triggered. ; Index: test/CodeGen/ARM/smlad6.ll =================================================================== --- test/CodeGen/ARM/smlad6.ll +++ test/CodeGen/ARM/smlad6.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; Alias check: check that the rewrite isn't triggered when there's a store ; instruction possibly aliasing any mul load operands; arguments are passed Index: test/CodeGen/ARM/smlad7.ll =================================================================== --- test/CodeGen/ARM/smlad7.ll +++ test/CodeGen/ARM/smlad7.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; Alias check: check that the rewrite isn't triggered when there's a store ; aliasing one of the mul load operands. Arguments are now annotated with Index: test/CodeGen/ARM/smlad8.ll =================================================================== --- test/CodeGen/ARM/smlad8.ll +++ test/CodeGen/ARM/smlad8.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; Mul with operands that are not simple load and sext/zext chains: this is not ; yet supported so the rewrite shouldn't trigger (but we do want to support this Index: test/CodeGen/ARM/smlad9.ll =================================================================== --- test/CodeGen/ARM/smlad9.ll +++ test/CodeGen/ARM/smlad9.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s ; ; Muls with operands that are constants: not yet supported, so the rewrite ; should not trigger (but we do want to add this soon).