Index: lib/Target/ARM/ARMParallelDSP.cpp =================================================================== --- lib/Target/ARM/ARMParallelDSP.cpp +++ lib/Target/ARM/ARMParallelDSP.cpp @@ -201,6 +201,12 @@ return false; } + // We need a preheader as getIncomingValueForBlock assumes there is one. + if (!TheLoop->getLoopPreheader()) { + LLVM_DEBUG(dbgs() << "No preheader found, bailing out\n"); + return false; + } + Function &F = *Header->getParent(); M = F.getParent(); DL = &M->getDataLayout(); @@ -220,6 +226,12 @@ return false; } + if (!ST->isLittle()) { + LLVM_DEBUG(dbgs() << "Only supporting little endian: not running pass " + "ARMParallelDSP\n"); + return false; + } + LoopAccessInfo LAI(L, SE, TLI, AA, DT, LI); LLVM_DEBUG(dbgs() << "\n== Parallel DSP pass ==\n"); @@ -454,12 +466,6 @@ F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; const BasicBlock *Latch = TheLoop->getLoopLatch(); - // We need a preheader as getIncomingValueForBlock assumes there is one. - if (!TheLoop->getLoopPreheader()) { - LLVM_DEBUG(dbgs() << "No preheader found, bailing out\n"); - return; - } - for (PHINode &Phi : Header->phis()) { const auto *Ty = Phi.getType(); if (!Ty->isIntegerTy(32) && !Ty->isIntegerTy(64)) Index: test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll =================================================================== --- test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll +++ test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll @@ -1,4 +1,7 @@ ; RUN: llc -O3 -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s | FileCheck %s +; RUN: llc -O3 -mtriple=armeb-arm-eabi -mcpu=cortex-m33 < %s | FileCheck %s --check-prefix=CHECK-UNSUPPORTED + +; CHECK-UNSUPPORTED-NOT: smlad ; CHECK-LABEL: add_user ; CHECK: %for.body Index: test/CodeGen/ARM/ParallelDSP/smlad0.ll =================================================================== --- test/CodeGen/ARM/ParallelDSP/smlad0.ll +++ test/CodeGen/ARM/ParallelDSP/smlad0.ll @@ -1,4 +1,5 @@ ; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=armeb-arm-eabi -mcpu=cortex-m0 < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED ; ; The Cortex-M0 does not support unaligned accesses: ; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m0 < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED Index: test/CodeGen/ARM/ParallelDSP/smladx-1.ll =================================================================== --- test/CodeGen/ARM/ParallelDSP/smladx-1.ll +++ test/CodeGen/ARM/ParallelDSP/smladx-1.ll @@ -1,6 +1,7 @@ ; RUN: opt -mtriple=thumbv8m.main -mcpu=cortex-m33 -arm-parallel-dsp %s -S -o - | FileCheck %s ; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m0 < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED ; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 -mattr=-dsp < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED +; RUN: opt -mtriple=armeb-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED define i32 @smladx(i16* nocapture readonly %pIn1, i16* nocapture readonly %pIn2, i32 %j, i32 %limit) { Index: test/CodeGen/ARM/ParallelDSP/smlald0.ll =================================================================== --- test/CodeGen/ARM/ParallelDSP/smlald0.ll +++ test/CodeGen/ARM/ParallelDSP/smlald0.ll @@ -1,4 +1,5 @@ ; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s +; RUN: opt -mtriple=armeb-arm-eabi -mcpu=cortex-m33 < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED ; ; The Cortex-M0 does not support unaligned accesses: ; RUN: opt -mtriple=arm-arm-eabi -mcpu=cortex-m0 < %s -arm-parallel-dsp -S | FileCheck %s --check-prefix=CHECK-UNSUPPORTED Index: test/CodeGen/ARM/ParallelDSP/unroll-n-jam-smlad.ll =================================================================== --- test/CodeGen/ARM/ParallelDSP/unroll-n-jam-smlad.ll +++ test/CodeGen/ARM/ParallelDSP/unroll-n-jam-smlad.ll @@ -1,9 +1,14 @@ ; RUN: llc -O3 -mtriple=thumbv7em %s -o - | FileCheck %s +; RUN: llc -O3 -mtriple=thumbv7eb %s -o - | FileCheck %s --check-prefix=CHECK-UNSUPPORTED ; RUN: llc -O3 -mtriple=thumbv8m.main -mattr=+dsp %s -o - | FileCheck %s +; CHECK-UNSUPPORTED-LABEL: unroll_n_jam_smlad +; CHECK-UNSUPPORTED-NOT: smlad r{{.}} + ; Test that the duplicate loads are removed, which allows parallel dsp to find ; the parallel operations. +; CHECK-LABEL: unroll_n_jam_smlad define void @unroll_n_jam_smlad(i32* %res, i16* %A, i16* %B, i32 %N, i32 %idx) { entry: %xtraiter306.i = and i32 %N, 3