Index: lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- lib/Target/ARM/ARMBaseInstrInfo.cpp +++ lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2400,7 +2400,8 @@ else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. - // For CMPri, we need to check Sub, thus we can't return here. + // For CMPri w/ CmpValue != 0, a Sub may still be a candidate. + // Thus we cannot return here. if (CmpInstr->getOpcode() == ARM::CMPri || CmpInstr->getOpcode() == ARM::t2CMPri) MI = nullptr; @@ -2479,8 +2480,8 @@ case ARM::t2EORrr: case ARM::t2EORri: { // Scan forward for the use of CPSR - // When checking against MI: if it's a conditional code requires - // checking of V bit, then this is not safe to do. + // When checking against MI: if it's a conditional code that requires + // checking of the V bit or C bit, then this is not safe to do. // It is safe to remove CmpInstr if CPSR is redefined or killed. // If we are done with the basic block, we need to check whether CPSR is // live-out. @@ -2547,19 +2548,30 @@ OperandsToUpdate.push_back( std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); } - } else + } else { + // No Sub, so this is x = y, z; cmp x, 0. switch (CC) { - default: + case ARMCC::EQ: // Z + case ARMCC::NE: // Z + case ARMCC::MI: // N + case ARMCC::PL: // N + case ARMCC::AL: // none // CPSR can be used multiple times, we should continue. break; - case ARMCC::VS: - case ARMCC::VC: - case ARMCC::GE: - case ARMCC::LT: - case ARMCC::GT: - case ARMCC::LE: + case ARMCC::HS: // C + case ARMCC::LO: // C + case ARMCC::VS: // V + case ARMCC::VC: // V + case ARMCC::HI: // C Z + case ARMCC::LS: // C Z + case ARMCC::GE: // N V + case ARMCC::LT: // N V + case ARMCC::GT: // Z N V + case ARMCC::LE: // Z N V + // The instruction uses the V bit or C bit which is not safe. return false; } + } } } Index: test/CodeGen/ARM/sub-cmp-peephole.ll =================================================================== --- test/CodeGen/ARM/sub-cmp-peephole.ll +++ test/CodeGen/ARM/sub-cmp-peephole.ll @@ -88,6 +88,19 @@ ret i32 23 } +; When considering the producer of cmp's src as the subsuming instruction, +; only consider that when the comparison is to 0. +define i32 @cmp_src_nonzero(i32 %a, i32 %b, i32 %x, i32 %y) { +entry: +; CHECK-LABEL: cmp_src_nonzero: +; CHECK: sub +; CHECK: cmp + %sub = sub i32 %a, %b + %cmp = icmp eq i32 %sub, 17 + %ret = select i1 %cmp, i32 %x, i32 %y + ret i32 %ret +} + define float @float_sel(i32 %a, i32 %b, float %x, float %y) { entry: ; CHECK-LABEL: float_sel: @@ -144,3 +157,50 @@ store i32 %sub, i32* @t ret double %ret } + +declare void @abort() +declare void @exit(i32) + +; If the comparison uses the V bit (signed overflow/underflow), we can't +; omit the comparison. +define i32 @cmp_slt0(i32 %a, i32 %b, i32 %x, i32 %y) { +entry: +; CHECK-LABEL: cmp_slt0 +; CHECK: sub +; CHECK: cmp +; CHECK: bge + %load = load i32* @t, align 4 + %sub = sub i32 %load, 17 + %cmp = icmp slt i32 %sub, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + call void @abort() + unreachable + +if.else: + call void @exit(i32 0) + unreachable +} + +; Same for the C bit. (Note the ult X, 0 is trivially +; false, so the DAG combiner may or may not optimize it). +define i32 @cmp_ult0(i32 %a, i32 %b, i32 %x, i32 %y) { +entry: +; CHECK-LABEL: cmp_ult0 +; CHECK: sub +; CHECK: cmp +; CHECK: bhs + %load = load i32* @t, align 4 + %sub = sub i32 %load, 17 + %cmp = icmp ult i32 %sub, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + call void @abort() + unreachable + +if.else: + call void @exit(i32 0) + unreachable +}