Index: lib/Target/ARM/Thumb2SizeReduction.cpp =================================================================== --- lib/Target/ARM/Thumb2SizeReduction.cpp +++ lib/Target/ARM/Thumb2SizeReduction.cpp @@ -335,7 +335,7 @@ bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA || Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD || Opc == ARM::t2LDMDB_UPD); - bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD); + bool isLROk = (Opc == ARM::t2STMDB_UPD); bool isSPOk = isPCOk || isLROk; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); Index: test/CodeGen/ARM/wrong-t2stmia-size-opt.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/wrong-t2stmia-size-opt.ll @@ -0,0 +1,52 @@ +; RUN: llc -mcpu=cortex-a9 -O1 -filetype=obj %s -o - | llvm-objdump -arch thumb -mcpu=cortex-a9 -d - | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7--linux-gnueabi" + +%union.BF_ctx = type { %struct.anon } +%struct.anon = type { [18 x i32], [4 x [256 x i32]] } + +; Test is big because reducing it changes register allocation so that LR +; register is not used as STMIA operand. It's also somewhat unreliable, because +; changes in register allocation or some instructions/optimizations can make the +; test pass with and without the bug. +define i32 @wrong-t2stmia-size-reduction(%union.BF_ctx* nocapture readonly %ctx, i32 %L, i32 %R, i32* %start, i32* readnone %end) #0 { +entry: + br label %do.body + +do.body: ; preds = %for.end, %entry + %R.addr.0 = phi i32 [ %R, %entry ], [ %xor1, %for.end ] + %start.addr.0 = phi i32* [ %start, %entry ], [ %incdec.ptr6, %for.end ] + %L.addr.0 = phi i32 [ %L, %entry ], [ %xor5, %for.end ] + %0 = getelementptr %union.BF_ctx* %ctx, i32 0, i32 0, i32 0, i32 0 + %1 = load i32* %0, align 4 + %xor = xor i32 %1, %L.addr.0 + %shr = lshr i32 %R.addr.0, 24 + br label %for.body + +for.body: ; preds = %do.body, %for.body + %i.019 = phi i32 [ 0, %do.body ], [ %add, %for.body ] + %L.addr.118 = phi i32 [ %xor, %do.body ], [ %xor1, %for.body ] + %xor1 = xor i32 %L.addr.118, %shr + %add = add nsw i32 %i.019, 1 + %cmp = icmp slt i32 %add, 2 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %2 = getelementptr %union.BF_ctx* %ctx, i32 0, i32 0, i32 0, i32 0 + %3 = load i32* %2, align 4 + %xor5 = xor i32 %3, %shr + %incdec.ptr = getelementptr inbounds i32* %start.addr.0, i32 1 + store i32 %xor5, i32* %start.addr.0, align 4 + %incdec.ptr6 = getelementptr inbounds i32* %start.addr.0, i32 2 + store i32 %xor1, i32* %incdec.ptr, align 4 + %cmp7 = icmp ult i32* %incdec.ptr6, %end + br i1 %cmp7, label %do.body, label %do.end + +do.end: ; preds = %for.end + ret i32 %xor5 +} + +; Check that stm writes two registers, the bug caused one of registers (invalid +; for this instruction) to be dropped. +; CHECK: stm{{[^,]*}}, {{{.*,.*}}}