Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7147,16 +7147,21 @@ /// through token factors and non-volatile loads. In order to remain efficient, /// this only looks a couple of nodes in, it does not do an exhaustive search. bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, - unsigned Depth) const { + unsigned Depth) const { if (*this == Dest) return true; // Don't search too deeply, we just want to be able to see through // TokenFactor's etc. if (Depth == 0) return false; - // If this is a token factor, all inputs to the TF happen in parallel. If any - // of the operands of the TF does not reach dest, then we cannot do the xform. + // If this is a token factor, all inputs to the TF happen in parallel. if (getOpcode() == ISD::TokenFactor) { + // First, try a shallow search: since a TokenFactor is parallel, there are + // no side-effects between a TokenFactor and its operands. + if (llvm::is_contained((*this)->ops(), Dest)) + return true; + // Next, try a deep search: check whether every operand of the TokenFactor + // reaches Dest. for (unsigned i = 0, e = getNumOperands(); i != e; ++i) if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1)) return false; Index: test/CodeGen/ARM/redundant-store.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/redundant-store.ll @@ -0,0 +1,27 @@ +; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s + +; CHECK-LABEL: test1: +; CHECK: test1_entry +; CHECK-NEXT: ldrb r0, [r1] +; CHECK-NEXT: bx lr +define i8 @test1(i8* %a, i8* %b) { +test1_entry: + %aa = load i8, i8* %a + %bb = load i8, i8* %b + store i8 %aa, i8* %a + ret i8 %bb +} + +; CHECK-LABEL: test2: +; CHECK: test2_entry +; CHECK-NEXT: ldrh r1, [r0] +; CHECK-NEXT: orr r1, r1, #384 +; CHECK-NEXT: strh r1, [r0] +; CHECK-NEXT: bx lr +define void @test2(i24* %a) { +test2_entry: + %aa = load i24, i24* %a + %b = or i24 %aa, 384 + store i24 %b, i24* %a + ret void +}