Index: llvm/lib/Target/ARM/ARMParallelDSP.cpp =================================================================== --- llvm/lib/Target/ARM/ARMParallelDSP.cpp +++ llvm/lib/Target/ARM/ARMParallelDSP.cpp @@ -459,6 +459,10 @@ if (ValidLHS && ValidRHS) return true; + // Ensure we don't add the root as the incoming accumulator. + if (R.getRoot() == I) + return false; + return R.InsertAcc(I); } case Instruction::Mul: { @@ -535,6 +539,7 @@ InsertParallelMACs(R); Changed = true; AllAdds.insert(R.getAdds().begin(), R.getAdds().end()); + LLVM_DEBUG(dbgs() << "BB after inserting parallel MACs:\n" << BB); } } Index: llvm/test/CodeGen/ARM/ParallelDSP/self-ref-bug.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/ParallelDSP/self-ref-bug.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=thumbv7-unknown-linux-gnueabihf < %s -arm-parallel-dsp -verify -dce -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +define i32 @test(ptr %b) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX1]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP0]], i32 0) +; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 0, 0 +; CHECK-NEXT: [[ADD15:%.*]] = add nsw i32 [[MUL13]], [[TMP2]] +; CHECK-NEXT: ret i32 [[ADD15]] +; +entry: + %0 = load i16, ptr %b, align 2 + %conv = sext i16 %0 to i32 + %arrayidx1 = getelementptr inbounds i16, ptr %b, i32 3 + %1 = load i16, ptr %arrayidx1, align 2 + %conv2 = sext i16 %1 to i32 + %mul = mul nsw i32 %conv2, %conv + %incdec.ptr = getelementptr inbounds i16, ptr %b, i32 1 + %2 = load i16, ptr %incdec.ptr, align 2 + %conv4 = sext i16 %2 to i32 + %arrayidx5 = getelementptr inbounds i16, ptr %b, i32 4 + %3 = load i16, ptr %arrayidx5, align 2 + %conv6 = sext i16 %3 to i32 + %mul7 = mul nsw i32 %conv6, %conv4 + %add9 = add nsw i32 %mul7, %mul + %mul13 = mul nsw i32 0, 0 + %add15 = add nsw i32 %mul13, %add9 + ret i32 %add15 +}