Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -9886,6 +9886,40 @@ return SDValue(); } +static SDValue PerformADDCCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT VT = LHS.getValueType(); + SDLoc DL(N); + + // If the flag result is dead, turn this into an ADD. + if (!N->hasAnyUseOfValue(1)) + return DCI.CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, LHS, RHS), + DAG.getConstant(0, DL, MVT::i32)); + + // Canonicalize constant to RHS. + ConstantSDNode *N0C = dyn_cast(LHS); + ConstantSDNode *N1C = dyn_cast(RHS); + if (N0C && !N1C) + return DAG.getNode(ARMISD::ADDC, DL, N->getVTList(), RHS, LHS); + + // fold (addc x, 0) -> x + no carry out + if (isNullConstant(RHS)) + return DCI.CombineTo(N, LHS, DAG.getConstant(0, DL, MVT::i32)); + + // If it cannot overflow, transform into an add. + if (DAG.computeOverflowKind(LHS, RHS) == SelectionDAG::OFK_Never) + return DCI.CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, LHS, RHS), + DAG.getConstant(0, DL, MVT::i32)); + + // Handle negative addend. + return PerformAddcSubcCombine(N, DAG, Subtarget); +} + static SDValue PerformAddeSubeCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { if (Subtarget->isThumb1Only()) { @@ -9916,6 +9950,37 @@ static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + + // canonicalize constant to RHS + ConstantSDNode *N0C = dyn_cast(LHS); + ConstantSDNode *N1C = dyn_cast(RHS); + if (N0C && !N1C) + return DAG.getNode(ARMISD::ADDE, SDLoc(N), N->getVTList(), + RHS, LHS, Carry); + + // fold (adde x, y, false) -> (addc x, y) + if (isNullConstant(Carry)) + return DAG.getNode(ARMISD::ADDC, SDLoc(N), N->getVTList(), LHS, RHS); + + // fold (adde x, const, 1) -> (addc x, const+1) + if (isOneConstant(Carry) && N1C) { + SDLoc DL(N); + EVT VT = RHS.getValueType(); + uint64_t plus1 = N1C->getZExtValue() + 1; + uint64_t max_uint = (1LL<getVTList(), LHS, RHS); + else + return DCI.CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, LHS, RHS), + DAG.getConstant(1, DL, MVT::i32)); + } + // Only ARM and Thumb2 support UMLAL/SMLAL. if (Subtarget->isThumb1Only()) return PerformAddeSubeCombine(N, DCI.DAG, Subtarget); @@ -9926,6 +9991,54 @@ return AddCombineTo64bitUMAAL(N, DCI, Subtarget); } +static SDValue PerformSUBCCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT VT = LHS.getValueType(); + SDLoc DL(N); + + // If the flag result is dead, turn this into an SUB. + if (!N->hasAnyUseOfValue(1)) + return DCI.CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, LHS, RHS), + DAG.getConstant(1, DL, MVT::i32)); + + // fold (subc x, x) -> 0 + no borrow + if (LHS == RHS) + return DCI.CombineTo(N, DAG.getConstant(0, DL, VT), + DAG.getConstant(1, DL, MVT::i32)); + + // fold (subc x, 0) -> x + no borrow + if (isNullConstant(RHS)) + return DCI.CombineTo(N, LHS, DAG.getConstant(1, DL, MVT::i32)); + + // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow + if (isAllOnesConstant(LHS)) + return DCI.CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, RHS, LHS), + DAG.getConstant(1, DL, MVT::i32)); + + // Handle negative subtrahend. + return PerformAddcSubcCombine(N, DAG, Subtarget); +} + +static SDValue PerformSUBECombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + + // fold (sube x, y, false) -> (subc x, y) + if (isOneConstant(Carry)) + return DAG.getNode(ARMISD::SUBC, SDLoc(N), N->getVTList(), LHS, RHS); + + return PerformAddeSubeCombine(N, DAG, Subtarget); +} + + /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with /// operands N0 and N1. This is a helper for PerformADDCombine that is /// called with the default operands, and if that fails, with commuted @@ -11758,6 +11871,14 @@ return SDValue(); } +static const APInt *isPowerOf2Constant(SDValue V) { + ConstantSDNode *C = dyn_cast(V); + if (!C) + return nullptr; + const APInt *CV = &C->getAPIntValue(); + return CV->isPowerOf2() ? CV : nullptr; +} + SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { // If we have a CMOV, OR and AND combination such as: // if (x & CN) @@ -11786,8 +11907,8 @@ SDValue And = CmpZ->getOperand(0); if (And->getOpcode() != ISD::AND) return SDValue(); - ConstantSDNode *AndC = dyn_cast(And->getOperand(1)); - if (!AndC || !AndC->getAPIntValue().isPowerOf2()) + const APInt *AndC = isPowerOf2Constant(And->getOperand(1)); + if (!AndC) return SDValue(); SDValue X = And->getOperand(0); @@ -11827,7 +11948,7 @@ SDValue V = Y; SDLoc dl(X); EVT VT = X.getValueType(); - unsigned BitInX = AndC->getAPIntValue().logBase2(); + unsigned BitInX = AndC->logBase2(); if (BitInX != 0) { // We must shift X first. @@ -11942,6 +12063,75 @@ N->getOperand(3), NewCmp); } + // Materialize a boolean comparison for integers so we can avoid branching. + if (VT.isInteger()) { + if (isNullConstant(FalseVal)) { + if (CC == ARMCC::EQ && isOneConstant(TrueVal)) { + if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) { + // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it + // right 5 bits will make that 32 be 1, otherwise it will be 0. + // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5 + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS); + Res = + DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub), + DAG.getConstant(5, dl, MVT::i32)); + } else { + // ARMISD::SUBC will set the carry only when x == y which means that + // the final ARMISD::ADDE (which uses the carry generated by + // ARMISD::SUBC) will be 1. If x != y then the rightmost SUB will + // give a nonzero unsigned number which will always be higher than 0 + // so the carry won't be set in that case and the two operands of + // ARMISD::ADDE will cancel each other. + // CMOV 0, 1, ==, (CMPZ x, y) -> + // ARMISD::ADDE ((SUB x, y), (ARMISD::SUBC 0, (SUB x, y))) + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + SDValue Neg = DAG.getNode(ARMISD::SUBC, dl, VTs, FalseVal, Sub); + Res = DAG.getNode(ARMISD::ADDE, dl, VTs, Sub, Neg, Neg.getValue(1)); + } + } else if (CC == ARMCC::NE && LHS != RHS && + (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) { + // This seems pointless but will allow us to combine it further below. + // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y) + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS); + Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc, + N->getOperand(3), Cmp); + } + } else if (isNullConstant(TrueVal)) { + if (CC == ARMCC::EQ && LHS != RHS && + (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) { + // This seems pointless but will allow us to combine it further below + // Note that we change == for != as this is the dual for the case above. + // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y) + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS); + Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal, + DAG.getConstant(ARMCC::NE, dl, MVT::i32), + N->getOperand(3), Cmp); + } + } + + // On Thumb1, the DAG above may be further combined. + // CMOV (SUB x, y), z, !=, (CMPZ x, y) -> + // ARMISD::SUBE ((SUB x, y), (ARMISD::SUBC (SUB x, y), z)) + // to avoid branching. + const APInt *TrueConst; + if (Subtarget->isThumb1Only() && CC == ARMCC::NE && + (FalseVal.getOpcode() == ISD::SUB) && (FalseVal.getOperand(0) == LHS) && + (FalseVal.getOperand(1) == RHS) && + (TrueConst = isPowerOf2Constant(TrueVal))) { + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + unsigned ShAmt = TrueConst->logBase2(); + if (ShAmt) + TrueVal = DAG.getConstant(1, dl, VT); + SDValue Subc = DAG.getNode(ARMISD::SUBC, dl, VTs, FalseVal, TrueVal); + Res = + DAG.getNode(ARMISD::SUBE, dl, VTs, FalseVal, Subc, Subc.getValue(1)); + if (ShAmt) + Res = DAG.getNode(ISD::SHL, dl, VT, Res, + DAG.getConstant(ShAmt, dl, MVT::i32)); + } + } + // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0)) // -> (cmov F T CC CPSR Cmp) if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) { @@ -11979,7 +12169,10 @@ DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; + case ARMISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); + case ARMISD::SUBC: return PerformSUBCCombine(N, DCI, Subtarget); + case ARMISD::SUBE: return PerformSUBECombine(N, DCI.DAG, Subtarget); case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget); case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); case ISD::SUB: return PerformSUBCombine(N, DCI); @@ -11987,9 +12180,6 @@ case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); - case ARMISD::ADDC: - case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget); - case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); Index: test/CodeGen/ARM/atomic-cmpxchg.ll =================================================================== --- test/CodeGen/ARM/atomic-cmpxchg.ll +++ test/CodeGen/ARM/atomic-cmpxchg.ll @@ -16,20 +16,17 @@ ; CHECK-ARM-LABEL: test_cmpxchg_res_i8 ; CHECK-ARM: bl __sync_val_compare_and_swap_1 -; CHECK-ARM: mov [[REG:r[0-9]+]], #0 -; CHECK-ARM: cmp r0, {{r[0-9]+}} -; CHECK-ARM: moveq [[REG]], #1 -; CHECK-ARM: mov r0, [[REG]] +; CHECK-ARM: sub r0, r0, {{r[0-9]+}} +; CHECK-ARM: rsbs [[REG:r[0-9]+]], r0, #0 +; CHECK-ARM: adc r0, r0, [[REG]] ; CHECK-THUMB-LABEL: test_cmpxchg_res_i8 ; CHECK-THUMB: bl __sync_val_compare_and_swap_1 ; CHECK-THUMB-NOT: mov [[R1:r[0-7]]], r0 -; CHECK-THUMB: movs [[R1:r[0-7]]], r0 -; CHECK-THUMB: movs r0, #1 -; CHECK-THUMB: movs [[R2:r[0-9]+]], #0 -; CHECK-THUMB: cmp [[R1]], {{r[0-9]+}} -; CHECK-THUMB: beq -; CHECK-THUMB: movs r0, [[R2]] +; CHECK-THUMB: subs [[R1:r[0-7]]], r0, {{r[0-9]+}} +; CHECK-THUMB: movs r0, #0 +; CHECK-THUMB: subs r0, r0, [[R1]] +; CHECK-THUMB: adcs r0, [[R1]] ; CHECK-ARMV6-LABEL: test_cmpxchg_res_i8: ; CHECK-ARMV6-NEXT: .fnstart @@ -48,14 +45,10 @@ ; CHECK-THUMBV6-LABEL: test_cmpxchg_res_i8: ; CHECK-THUMBV6: mov [[EXPECTED:r[0-9]+]], r1 ; CHECK-THUMBV6-NEXT: bl __sync_val_compare_and_swap_1 -; CHECK-THUMBV6-NEXT: mov [[RES:r[0-9]+]], r0 -; CHECK-THUMBV6-NEXT: movs r0, #1 -; CHECK-THUMBV6-NEXT: movs [[ZERO:r[0-9]+]], #0 -; CHECK-THUMBV6-NEXT: cmp [[RES]], [[EXPECTED]] -; CHECK-THUMBV6-NEXT: beq [[END:.LBB[0-9_]+]] -; CHECK-THUMBV6-NEXT: mov r0, [[ZERO]] -; CHECK-THUMBV6-NEXT: [[END]]: -; CHECK-THUMBV6-NEXT: pop {{.*}}pc} +; CHECK-THUMBV6-NEXT: subs [[R1:r[0-7]]], r0, {{r[0-9]+}} +; CHECK-THUMBV6-NEXT: movs r0, #0 +; CHECK-THUMBV6-NEXT: subs r0, r0, [[R1]] +; CHECK-THUMBV6-NEXT: adcs r0, [[R1]] ; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8: ; CHECK-ARMV7-NEXT: .fnstart Index: test/CodeGen/ARM/cmn.ll =================================================================== --- test/CodeGen/ARM/cmn.ll +++ test/CodeGen/ARM/cmn.ll @@ -7,8 +7,8 @@ ; CHECK-NOT: mvn ; CHECK: cmn %cmp = icmp sgt i32 %a, -78 - %. = zext i1 %cmp to i32 - ret i32 %. + %ret = select i1 %cmp, i32 42, i32 24 + ret i32 %ret } define i32 @compare_r_eq(i32 %a, i32 %b) { @@ -17,6 +17,6 @@ ; CHECK: cmn %sub = sub nsw i32 0, %b %cmp = icmp eq i32 %a, %sub - %. = zext i1 %cmp to i32 - ret i32 %. + %ret = select i1 %cmp, i32 42, i32 24 + ret i32 %ret } Index: test/CodeGen/ARM/cmpxchg-O0.ll =================================================================== --- test/CodeGen/ARM/cmpxchg-O0.ll +++ test/CodeGen/ARM/cmpxchg-O0.ll @@ -18,8 +18,10 @@ ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: -; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]] -; CHECK: {{moveq|movweq}} {{r[0-9]+}}, #1 +; Materialisation of a boolean is done with sub/clz/lsr +; CHECK: sub{{(s)?}} [[CMP1:r[0-9]+]], [[OLD]], [[DESIRED]] +; CHECK: clz [[CMP2:r[0-9]+]], [[CMP1]] +; CHECK: lsr{{(s)?}} {{r[0-9]+}}, [[CMP2]], #5 ; CHECK: dmb ish %res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic ret { i8, i1 } %res @@ -38,8 +40,10 @@ ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: -; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]] -; CHECK: {{moveq|movweq}} {{r[0-9]+}}, #1 +; Materialisation of a boolean is done with sub/clz/lsr +; CHECK: sub{{(s)?}} [[CMP1:r[0-9]+]], [[OLD]], [[DESIRED]] +; CHECK: clz [[CMP2:r[0-9]+]], [[CMP1]] +; CHECK: lsr{{(s)?}} {{r[0-9]+}}, [[CMP2]], #5 ; CHECK: dmb ish %res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst monotonic ret { i16, i1 } %res @@ -58,8 +62,10 @@ ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: -; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]] -; CHECK: {{moveq|movweq}} {{r[0-9]+}}, #1 +; Materialisation of a boolean is done with sub/clz/lsr +; CHECK: sub{{(s)?}} [[CMP1:r[0-9]+]], [[OLD]], [[DESIRED]] +; CHECK: clz [[CMP2:r[0-9]+]], [[CMP1]] +; CHECK: lsr{{(s)?}} {{r[0-9]+}}, [[CMP2]], #5 ; CHECK: dmb ish %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic ret { i32, i1 } %res Index: test/CodeGen/ARM/fp16-promote.ll =================================================================== --- test/CodeGen/ARM/fp16-promote.ll +++ test/CodeGen/ARM/fp16-promote.ll @@ -170,8 +170,10 @@ ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-VFP: vcmp.f32 ; CHECK-NOVFP: bl __aeabi_fcmpeq -; CHECK-FP16: vmrs APSR_nzcv, fpscr -; CHECK-ALL: movw{{ne|eq}} +; CHECK-VFP-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-VFP-NEXT: movwne +; CHECK-NOVFP-NEXT: clz r0, r0 +; CHECK-NOVFP-NEXT: lsr r0, r0, #5 define i1 @test_fcmp_une(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 %b = load half, half* %q, align 2 Index: test/CodeGen/ARM/long-setcc.ll =================================================================== --- test/CodeGen/ARM/long-setcc.ll +++ test/CodeGen/ARM/long-setcc.ll @@ -15,6 +15,5 @@ ret i1 %tmp } -; CHECK: cmp ; CHECK-NOT: cmp Index: test/CodeGen/ARM/select-imm.ll =================================================================== --- test/CodeGen/ARM/select-imm.ll +++ test/CodeGen/ARM/select-imm.ll @@ -3,9 +3,15 @@ ; RUN: llc -mtriple=arm-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \ ; RUN: | FileCheck %s --check-prefix=ARMT2 +; RUN: llc -mtriple=thumb-eabi -mcpu=cortex-m0 %s -o - \ +; RUN: | FileCheck %s --check-prefix=THUMB1 + ; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \ ; RUN: | FileCheck %s --check-prefix=THUMB2 +; RUN: llc -mtriple=thumbv8m.base-eabi %s -o - \ +; RUN: | FileCheck %s --check-prefix=V8MBASE + define i32 @t1(i32 %c) nounwind readnone { entry: ; ARM-LABEL: t1: @@ -17,6 +23,10 @@ ; ARMT2: movw [[R:r[0-1]]], #357 ; ARMT2: movwgt [[R]], #123 +; THUMB1-LABEL: t1: +; THUMB1: cmp r{{[0-9]+}}, #1 +; THUMB1: bgt + ; THUMB2-LABEL: t1: ; THUMB2: movw [[R:r[0-1]]], #357 ; THUMB2: movgt [[R]], #123 @@ -37,6 +47,10 @@ ; ARMT2: mov [[R:r[0-1]]], #123 ; ARMT2: movwgt [[R]], #357 +; THUMB1-LABEL: t2: +; THUMB1: cmp r{{[0-9]+}}, #1 +; THUMB1: bgt + ; THUMB2-LABEL: t2: ; THUMB2: mov{{(s|\.w)}} [[R:r[0-1]]], #123 ; THUMB2: movwgt [[R]], #357 @@ -49,16 +63,21 @@ define i32 @t3(i32 %a) nounwind readnone { entry: ; ARM-LABEL: t3: -; ARM: mov [[R:r[0-1]]], #0 -; ARM: moveq [[R]], #1 +; ARM: rsbs r1, r0, #0 +; ARM: adc r0, r0, r1 ; ARMT2-LABEL: t3: -; ARMT2: mov [[R:r[0-1]]], #0 -; ARMT2: movweq [[R]], #1 +; ARMT2: clz r0, r0 +; ARMT2: lsr r0, r0, #5 + +; THUMB1-LABEL: t3: +; THUMB1: movs r1, #0 +; THUMB1: subs r1, r1, r0 +; THUMB1: adcs r0, r1 ; THUMB2-LABEL: t3: -; THUMB2: mov{{(s|\.w)}} [[R:r[0-1]]], #0 -; THUMB2: moveq [[R]], #1 +; THUMB2: clz r0, r0 +; THUMB2: lsrs r0, r0, #5 %0 = icmp eq i32 %a, 160 %1 = zext i1 %0 to i32 ret i32 %1 @@ -74,6 +93,10 @@ ; ARMT2: movwlt [[R0:r[0-9]+]], #65365 ; ARMT2: movtlt [[R0]], #65365 +; THUMB1-LABEL: t4: +; THUMB1: cmp r{{[0-9]+}}, r{{[0-9]+}} +; THUMB1: blt + ; THUMB2-LABEL: t4: ; THUMB2: mvnlt [[R0:r[0-9]+]], #11141290 %0 = icmp slt i32 %a, %b @@ -86,15 +109,23 @@ entry: ; ARM-LABEL: t5: ; ARM-NOT: mov -; ARM: cmp r0, #1 +; ARM: sub r0, r0, #1 ; ARM-NOT: mov -; ARM: movne r0, #0 +; ARM: rsbs r1, r0, #0 +; ARM: adc r0, r0, r1 + +; THUMB1-LABEL: t5: +; THUMB1-NOT: bne +; THUMB1: movs r0, #0 +; THUMB1: subs r0, r0, r1 +; THUMB1: adcs r0, r1 ; THUMB2-LABEL: t5: ; THUMB2-NOT: mov -; THUMB2: cmp r0, #1 -; THUMB2: it ne -; THUMB2: movne r0, #0 +; THUMB2: subs r0, #1 +; THUMB2: clz r0, r0 +; THUMB2: lsrs r0, r0, #5 + %cmp = icmp eq i32 %a, 1 %conv = zext i1 %cmp to i32 ret i32 %conv @@ -107,6 +138,10 @@ ; ARM: cmp r0, #0 ; ARM: movne r0, #1 +; THUMB1-LABEL: t6: +; THUMB1: cmp r{{[0-9]+}}, #0 +; THUMB1: bne + ; THUMB2-LABEL: t6: ; THUMB2-NOT: mov ; THUMB2: cmp r0, #0 @@ -116,3 +151,204 @@ %lnot.ext = zext i1 %tobool to i32 ret i32 %lnot.ext } + +define i32 @t7(i32 %a, i32 %b) nounwind readnone { +entry: +; ARM-LABEL: t7: +; ARM: subs r0, r0, r1 +; ARM: movne r0, #4 + +; ARMT2-LABEL: t7: +; ARMT2: subs r0, r0, r1 +; ARMT2: movwne r0, #4 + +; THUMB1-LABEL: t7: +; THUMB1: subs r0, r0, r1 +; THUMB1: subs r1, r0, #1 +; THUMB1: sbcs r0, r1 +; THUMB1: lsls r0, r0, #2 + +; THUMB2-LABEL: t7: +; THUMB2: subs r0, r0, r1 +; THUMB2: it ne +; THUMB2: movne r0, #4 + %0 = icmp ne i32 %a, %b + %1 = select i1 %0, i32 4, i32 0 + ret i32 %1 +} + +define void @t8(i32 %a) { +entry: + +; ARM scheduler emits icmp/zext before both calls, so isn't relevant + +; ARMT2-LABEL: t8: +; ARMT2: bl t7 +; ARMT2: mov r1, r0 +; ARMT2: sub r0, r4, #5 +; ARMT2: clz r0, r0 +; ARMT2: lsr r0, r0, #5 + +; THUMB1-LABEL: t8: +; THUMB1: bl t7 +; THUMB1: mov r1, r0 +; THUMB1: subs r2, r4, #5 +; THUMB1: movs r0, #0 +; THUMB1: subs r0, r0, r2 +; THUMB1: adcs r0, r2 + +; THUMB2-LABEL: t8: +; THUMB2: bl t7 +; THUMB2: mov r1, r0 +; THUMB2: subs r0, r4, #5 +; THUMB2: clz r0, r0 +; THUMB2: lsrs r0, r0, #5 + + %cmp = icmp eq i32 %a, 5 + %conv = zext i1 %cmp to i32 + %call = tail call i32 @t7(i32 9, i32 %a) + tail call i32 @t7(i32 %conv, i32 %call) + ret void +} + +define void @t9(i8* %a, i8 %b) { +entry: + +; ARM scheduler emits icmp/zext before both calls, so isn't relevant + +; ARMT2-LABEL: t9: +; ARMT2: bl f +; ARMT2: uxtb r0, r4 +; ARMT2: cmp r0, r0 +; ARMT2: add r1, r4, #1 +; ARMT2: mov r2, r0 +; ARMT2: add r2, r2, #1 +; ARMT2: add r1, r1, #1 +; ARMT2: uxtb r3, r2 +; ARMT2: cmp r3, r0 + +; THUMB1-LABEL: t9: +; THUMB1: bl f +; THUMB1: sxtb r1, r4 +; THUMB1: uxtb r0, r1 +; THUMB1: cmp r0, r0 +; THUMB1: adds r1, r1, #1 +; THUMB1: mov r2, r0 +; THUMB1: adds r1, r1, #1 +; THUMB1: adds r2, r2, #1 +; THUMB1: uxtb r3, r2 +; THUMB1: cmp r3, r0 + +; THUMB2-LABEL: t9: +; THUMB2: bl f +; THUMB2: uxtb r0, r4 +; THUMB2: cmp r0, r0 +; THUMB2: adds r1, r4, #1 +; THUMB2: mov r2, r0 +; THUMB2: adds r2, #1 +; THUMB2: adds r1, #1 +; THUMB2: uxtb r3, r2 +; THUMB2: cmp r3, r0 + + %0 = load i8, i8* %a + %conv = sext i8 %0 to i32 + %conv119 = zext i8 %0 to i32 + %conv522 = and i32 %conv, 255 + %cmp723 = icmp eq i32 %conv522, %conv119 + tail call void @f(i1 zeroext %cmp723) + br i1 %cmp723, label %while.body, label %while.end + +while.body: ; preds = %entry, %while.body + %ref.025 = phi i8 [ %inc9, %while.body ], [ %0, %entry ] + %in.024 = phi i32 [ %inc, %while.body ], [ %conv, %entry ] + %inc = add i32 %in.024, 1 + %inc9 = add i8 %ref.025, 1 + %conv1 = zext i8 %inc9 to i32 + %cmp = icmp slt i32 %conv1, %conv119 + %conv5 = and i32 %inc, 255 + br i1 %cmp, label %while.body, label %while.end + +while.end: + ret void +} + +declare void @f(i1 zeroext) + + +define i1 @t10() { +entry: + %q = alloca i32 + %p = alloca i32 + store i32 -3, i32* %q + store i32 -8, i32* %p + %0 = load i32, i32* %q + %1 = load i32, i32* %p + %div = sdiv i32 %0, %1 + %mul = mul nsw i32 %div, %1 + %rem = srem i32 %0, %1 + %add = add nsw i32 %mul, %rem + %cmp = icmp eq i32 %add, %0 + ret i1 %cmp + +; ARM-LABEL: t10: +; ARM: rsbs r1, r0, #0 +; ARM: adc r0, r0, r1 + +; ARMT2-LABEL: t10: +; ARMT2: clz r0, r0 +; ARMT2: lsr r0, r0, #5 + +; THUMB1-LABEL: t10: +; THUMB1: movs r0, #0 +; THUMB1: subs r0, r0, r1 +; THUMB1: adcs r0, r1 + +; THUMB2-LABEL: t10: +; THUMB2: clz r0, r0 +; THUMB2: lsrs r0, r0, #5 + +; V8MBASE-LABEL: t10: +; V8MBASE-NOT: movs r0, #0 +; V8MBASE: movs r0, #1 +} + +define i1 @t11() { +entry: + %bit = alloca i32 + %load = load i32, i32* %bit + %clear = and i32 %load, -4096 + %set = or i32 %clear, 33 + store i32 %set, i32* %bit + %load1 = load i32, i32* %bit + %clear2 = and i32 %load1, -33550337 + %set3 = or i32 %clear2, 40960 + %clear5 = and i32 %set3, 4095 + %rem = srem i32 %clear5, 10 + %clear9 = and i32 %set3, -4096 + %set10 = or i32 %clear9, %rem + store i32 %set10, i32* %bit + %clear12 = and i32 %set10, 4095 + %cmp = icmp eq i32 %clear12, 3 + ret i1 %cmp + +; ARM-LABEL: t11: +; ARM: rsbs r1, r0, #0 +; ARM: adc r0, r0, r1 + +; ARMT2-LABEL: t11: +; ARMT2: clz r0, r0 +; ARMT2: lsr r0, r0, #5 + +; THUMB1-LABEL: t11: +; THUMB1-NOT: movs r0, #0 +; THUMB1: movs r0, #1 + +; THUMB2-LABEL: t11: +; THUMB2: clz r0, r0 +; THUMB2: lsrs r0, r0, #5 + +; V8MBASE-LABEL: t11: +; V8MBASE-NOT: movs r0, #0 +; V8MBASE: movs r0, #1 +} + Index: test/CodeGen/ARM/setcc-logic.ll =================================================================== --- test/CodeGen/ARM/setcc-logic.ll +++ test/CodeGen/ARM/setcc-logic.ll @@ -20,12 +20,12 @@ define zeroext i1 @and_eq(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK-LABEL: and_eq: ; CHECK: @ BB#0: -; CHECK-NEXT: eor r2, r2, r3 -; CHECK-NEXT: eor r0, r0, r1 -; CHECK-NEXT: orrs r0, r0, r2 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: movweq r0, #1 -; CHECK-NEXT: bx lr +; CHECK: eor r2, r2, r3 +; CHECK: eor r0, r0, r1 +; CHECK: orr r0, r0, r2 +; CHECK: clz r0, r0 +; CHECK: lsr r0, r0, #5 +; CHECK: bx lr %cmp1 = icmp eq i32 %a, %b %cmp2 = icmp eq i32 %c, %d %and = and i1 %cmp1, %cmp2 Index: test/CodeGen/Thumb/branchless-cmp.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb/branchless-cmp.ll @@ -0,0 +1,104 @@ +; RUN: llc -mtriple=thumb-eabi -mcpu=cortex-m0 %s -verify-machineinstrs -o - | FileCheck %s + +define i32 @test1a(i32 %a, i32 %b) { +entry: + %cmp = icmp ne i32 %a, %b + %cond = zext i1 %cmp to i32 + ret i32 %cond +; CHECK-LABEL: test1a: +; CHECK: subs r0, r0, r1 +; CHECK-NEXT: subs r1, r0, #1 +; CHECK-NEXT: sbcs r0, r1 +} + +define i32 @test1b(i32 %a, i32 %b) { +entry: + %cmp = icmp eq i32 %a, %b + %cond = zext i1 %cmp to i32 + ret i32 %cond +; CHECK-LABEL: test1b: +; CHECK: subs r1, r0, r1 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: subs r0, r0, r1 +; CHECK-NEXT: adcs r0, r1 +} + +define i32 @test2a(i32 %a, i32 %b) { +entry: + %cmp = icmp eq i32 %a, %b + %cond = zext i1 %cmp to i32 + ret i32 %cond +; CHECK-LABEL: test2a: +; CHECK: subs r1, r0, r1 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: subs r0, r0, r1 +; CHECK-NEXT: adcs r0, r1 +} + +define i32 @test2b(i32 %a, i32 %b) { +entry: + %cmp = icmp ne i32 %a, %b + %cond = zext i1 %cmp to i32 + ret i32 %cond +; CHECK-LABEL: test2b: +; CHECK: subs r0, r0, r1 +; CHECK-NEXT: subs r1, r0, #1 +; CHECK-NEXT: sbcs r0, r1 +} + +define i32 @test3a(i32 %a, i32 %b) { +entry: + %cmp = icmp eq i32 %a, %b + %cond = select i1 %cmp, i32 0, i32 4 + ret i32 %cond +; CHECK-LABEL: test3a: +; CHECK: subs r0, r0, r1 +; CHECK-NEXT: subs r1, r0, #1 +; CHECK-NEXT: sbcs r0, r1 +; CHECK-NEXT: lsls r0, r0, #2 +} + +; This one hasn't changed actually. +define i32 @test3b(i32 %a, i32 %b) { +entry: + %cmp = icmp eq i32 %a, %b + %cond = select i1 %cmp, i32 4, i32 0 + ret i32 %cond +; CHECK-LABEL: test3b: +; CHECK: mov r2, r0 +; CHECK-NEXT: movs r0, #4 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: cmp r2, r1 +; CHECK-NEXT: beq .[[BRANCH:[A-Z0-9_]+]] +; CHECK: mov r0, r3 +; CHECK: .[[BRANCH]]: +} + +; This one hasn't changed actually. +define i32 @test4a(i32 %a, i32 %b) { +entry: + %cmp = icmp ne i32 %a, %b + %cond = select i1 %cmp, i32 0, i32 4 + ret i32 %cond +; CHECK-LABEL: test4a: +; CHECK: mov r2, r0 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: movs r3, #4 +; CHECK-NEXT: cmp r2, r1 +; CHECK-NEXT: bne .[[BRANCH:[A-Z0-9_]+]] +; CHECK: mov r0, r3 +; CHECK: .[[BRANCH]]: +} + +define i32 @test4b(i32 %a, i32 %b) { +entry: + %cmp = icmp ne i32 %a, %b + %cond = select i1 %cmp, i32 4, i32 0 + ret i32 %cond +; CHECK-LABEL: test4b: +; CHECK: subs r0, r0, r1 +; CHECK-NEXT: subs r1, r0, #1 +; CHECK-NEXT: sbcs r0, r1 +; CHECK-NEXT: lsls r0, r0, #2 +} + Index: test/CodeGen/Thumb/constants.ll =================================================================== --- test/CodeGen/Thumb/constants.ll +++ test/CodeGen/Thumb/constants.ll @@ -17,3 +17,18 @@ define i32 @mov_and_add2() { ret i32 511 } + +; CHECK-T1-LABEL: @test64 +; CHECK-T2-LABEL: @test64 +; CHECK-T1: movs r0, #15 +; CHECK-T1-NEXT: mvns r0, r0 +; CHECK-T2: subs.w r0, r{{[0-9]+}}, #15 +; CHECK-T2-NEXT: sbc r1, r{{[0-9]+}}, #0 +define i32 @test64() { +entry: + tail call void @fn1(i64 -1) + tail call void @fn1(i64 -1) + tail call void @fn1(i64 -16) + ret i32 0 +} +declare void @fn1(i64) ; Index: test/CodeGen/Thumb/long-setcc.ll =================================================================== --- test/CodeGen/Thumb/long-setcc.ll +++ test/CodeGen/Thumb/long-setcc.ll @@ -15,8 +15,5 @@ ret i1 %tmp } -; CHECK: cmp ; CHECK-NOT: cmp - - Index: test/CodeGen/Thumb2/float-cmp.ll =================================================================== --- test/CodeGen/Thumb2/float-cmp.ll +++ test/CodeGen/Thumb2/float-cmp.ll @@ -69,7 +69,8 @@ ; HARD: movvc r0, #1 %1 = fcmp ord float %a, %b ret i1 %1 -}define i1 @cmp_f_ueq(float %a, float %b) { +} +define i1 @cmp_f_ueq(float %a, float %b) { ; CHECK-LABEL: cmp_f_ueq: ; NONE: bl __aeabi_fcmpeq ; NONE: bl __aeabi_fcmpun @@ -82,8 +83,8 @@ define i1 @cmp_f_ugt(float %a, float %b) { ; CHECK-LABEL: cmp_f_ugt: ; NONE: bl __aeabi_fcmple -; NONE: cmp r0, #0 -; NONE-NEXT: it eq +; NONE-NEXT: clz r0, r0 +; NONE-NEXT: lsrs r0, r0, #5 ; HARD: vcmpe.f32 ; HARD: movhi r0, #1 %1 = fcmp ugt float %a, %b @@ -92,8 +93,8 @@ define i1 @cmp_f_uge(float %a, float %b) { ; CHECK-LABEL: cmp_f_uge: ; NONE: bl __aeabi_fcmplt -; NONE: cmp r0, #0 -; NONE-NEXT: it eq +; NONE-NEXT: clz r0, r0 +; NONE-NEXT: lsrs r0, r0, #5 ; HARD: vcmpe.f32 ; HARD: movpl r0, #1 %1 = fcmp uge float %a, %b @@ -102,8 +103,8 @@ define i1 @cmp_f_ult(float %a, float %b) { ; CHECK-LABEL: cmp_f_ult: ; NONE: bl __aeabi_fcmpge -; NONE: cmp r0, #0 -; NONE-NEXT: it eq +; NONE-NEXT: clz r0, r0 +; NONE-NEXT: lsrs r0, r0, #5 ; HARD: vcmpe.f32 ; HARD: movlt r0, #1 %1 = fcmp ult float %a, %b @@ -112,8 +113,8 @@ define i1 @cmp_f_ule(float %a, float %b) { ; CHECK-LABEL: cmp_f_ule: ; NONE: bl __aeabi_fcmpgt -; NONE: cmp r0, #0 -; NONE-NEXT: it eq +; NONE-NEXT: clz r0, r0 +; NONE-NEXT: lsrs r0, r0, #5 ; HARD: vcmpe.f32 ; HARD: movle r0, #1 %1 = fcmp ule float %a, %b Index: test/CodeGen/Thumb2/thumb2-cmn.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-cmn.ll +++ test/CodeGen/Thumb2/thumb2-cmn.ll @@ -3,72 +3,80 @@ ; These tests could be improved by 'movs r0, #0' being rematerialized below the ; test as 'mov.w r0, #0'. -define i1 @f1(i32 %a, i32 %b) { +define i32 @f1(i32 %a, i32 %b) { %nb = sub i32 0, %b %tmp = icmp ne i32 %a, %nb - ret i1 %tmp + %ret = select i1 %tmp, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f1: ; CHECK: cmn {{.*}}, r1 -define i1 @f2(i32 %a, i32 %b) { +define i32 @f2(i32 %a, i32 %b) { %nb = sub i32 0, %b %tmp = icmp ne i32 %nb, %a - ret i1 %tmp + %ret = select i1 %tmp, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f2: ; CHECK: cmn {{.*}}, r1 -define i1 @f3(i32 %a, i32 %b) { +define i32 @f3(i32 %a, i32 %b) { %nb = sub i32 0, %b %tmp = icmp eq i32 %a, %nb - ret i1 %tmp + %ret = select i1 %tmp, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f3: ; CHECK: cmn {{.*}}, r1 -define i1 @f4(i32 %a, i32 %b) { +define i32 @f4(i32 %a, i32 %b) { %nb = sub i32 0, %b %tmp = icmp eq i32 %nb, %a - ret i1 %tmp + %ret = select i1 %tmp, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f4: ; CHECK: cmn {{.*}}, r1 -define i1 @f5(i32 %a, i32 %b) { +define i32 @f5(i32 %a, i32 %b) { %tmp = shl i32 %b, 5 %nb = sub i32 0, %tmp %tmp1 = icmp eq i32 %nb, %a - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f5: ; CHECK: cmn.w {{.*}}, r1, lsl #5 -define i1 @f6(i32 %a, i32 %b) { +define i32 @f6(i32 %a, i32 %b) { %tmp = lshr i32 %b, 6 %nb = sub i32 0, %tmp %tmp1 = icmp ne i32 %nb, %a - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f6: ; CHECK: cmn.w {{.*}}, r1, lsr #6 -define i1 @f7(i32 %a, i32 %b) { +define i32 @f7(i32 %a, i32 %b) { %tmp = ashr i32 %b, 7 %nb = sub i32 0, %tmp %tmp1 = icmp eq i32 %a, %nb - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f7: ; CHECK: cmn.w {{.*}}, r1, asr #7 -define i1 @f8(i32 %a, i32 %b) { +define i32 @f8(i32 %a, i32 %b) { %l8 = shl i32 %a, 24 %r8 = lshr i32 %a, 8 %tmp = or i32 %l8, %r8 %nb = sub i32 0, %tmp %tmp1 = icmp ne i32 %a, %nb - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f8: ; CHECK: cmn.w {{.*}}, {{.*}}, ror #8 Index: test/CodeGen/Thumb2/thumb2-cmn2.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-cmn2.ll +++ test/CodeGen/Thumb2/thumb2-cmn2.ll @@ -1,33 +1,37 @@ ; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s ; -0x000000bb = 4294967109 -define i1 @f1(i32 %a) { +define i32 @f1(i32 %a) { ; CHECK-LABEL: f1: ; CHECK: adds {{r.*}}, #187 %tmp = icmp ne i32 %a, 4294967109 - ret i1 %tmp + %ret = select i1 %tmp, i32 42, i32 24 + ret i32 %ret } ; -0x00aa00aa = 4283826006 -define i1 @f2(i32 %a) { +define i32 @f2(i32 %a) { ; CHECK-LABEL: f2: ; CHECK: cmn.w {{r.*}}, #11141290 %tmp = icmp eq i32 %a, 4283826006 - ret i1 %tmp + %ret = select i1 %tmp, i32 42, i32 24 + ret i32 %ret } ; -0xcc00cc00 = 872363008 -define i1 @f3(i32 %a) { +define i32 @f3(i32 %a) { ; CHECK-LABEL: f3: ; CHECK: cmn.w {{r.*}}, #-872363008 %tmp = icmp ne i32 %a, 872363008 - ret i1 %tmp + %ret = select i1 %tmp, i32 42, i32 24 + ret i32 %ret } ; -0x00110000 = 4293853184 -define i1 @f4(i32 %a) { +define i32 @f4(i32 %a) { ; CHECK-LABEL: f4: ; CHECK: cmn.w {{r.*}}, #1114112 %tmp = icmp eq i32 %a, 4293853184 - ret i1 %tmp + %ret = select i1 %tmp, i32 42, i32 24 + ret i32 %ret } Index: test/CodeGen/Thumb2/thumb2-cmp.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-cmp.ll +++ test/CodeGen/Thumb2/thumb2-cmp.ll @@ -4,43 +4,48 @@ ; test as 'mov.w r0, #0'. ; 0x000000bb = 187 -define i1 @f1(i32 %a) { +define i32 @f1(i32 %a) { ; CHECK-LABEL: f1: ; CHECK: cmp {{.*}}, #187 %tmp = icmp ne i32 %a, 187 - ret i1 %tmp + %ret = select i1 %tmp, i32 42, i32 24 + ret i32 %ret } ; 0x00aa00aa = 11141290 -define i1 @f2(i32 %a) { +define i32 @f2(i32 %a) { ; CHECK-LABEL: f2: ; CHECK: cmp.w {{.*}}, #11141290 %tmp = icmp eq i32 %a, 11141290 - ret i1 %tmp + %ret = select i1 %tmp, i32 42, i32 24 + ret i32 %ret } ; 0xcc00cc00 = 3422604288 -define i1 @f3(i32 %a) { +define i32 @f3(i32 %a) { ; CHECK-LABEL: f3: ; CHECK: cmp.w {{.*}}, #-872363008 %tmp = icmp ne i32 %a, 3422604288 - ret i1 %tmp + %ret = select i1 %tmp, i32 42, i32 24 + ret i32 %ret } ; 0xdddddddd = 3722304989 -define i1 @f4(i32 %a) { +define i32 @f4(i32 %a) { ; CHECK-LABEL: f4: ; CHECK: cmp.w {{.*}}, #-572662307 %tmp = icmp ne i32 %a, 3722304989 - ret i1 %tmp + %ret = select i1 %tmp, i32 42, i32 24 + ret i32 %ret } ; 0x00110000 = 1114112 -define i1 @f5(i32 %a) { +define i32 @f5(i32 %a) { ; CHECK-LABEL: f5: ; CHECK: cmp.w {{.*}}, #1114112 %tmp = icmp eq i32 %a, 1114112 - ret i1 %tmp + %ret = select i1 %tmp, i32 42, i32 24 + ret i32 %ret } ; Check that we don't do an invalid (a > b) --> !(a < b + 1) transform. Index: test/CodeGen/Thumb2/thumb2-cmp2.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-cmp2.ll +++ test/CodeGen/Thumb2/thumb2-cmp2.ll @@ -3,50 +3,56 @@ ; These tests would be improved by 'movs r0, #0' being rematerialized below the ; test as 'mov.w r0, #0'. -define i1 @f1(i32 %a, i32 %b) { +define i32 @f1(i32 %a, i32 %b) { ; CHECK-LABEL: f1: ; CHECK: cmp {{.*}}, r1 %tmp = icmp ne i32 %a, %b - ret i1 %tmp + %ret = select i1 %tmp, i32 42, i32 24 + ret i32 %ret } -define i1 @f2(i32 %a, i32 %b) { +define i32 @f2(i32 %a, i32 %b) { ; CHECK-LABEL: f2: ; CHECK: cmp {{.*}}, r1 %tmp = icmp eq i32 %a, %b - ret i1 %tmp + %ret = select i1 %tmp, i32 42, i32 24 + ret i32 %ret } -define i1 @f6(i32 %a, i32 %b) { +define i32 @f6(i32 %a, i32 %b) { ; CHECK-LABEL: f6: ; CHECK: cmp.w {{.*}}, r1, lsl #5 %tmp = shl i32 %b, 5 %tmp1 = icmp eq i32 %tmp, %a - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } -define i1 @f7(i32 %a, i32 %b) { +define i32 @f7(i32 %a, i32 %b) { ; CHECK-LABEL: f7: ; CHECK: cmp.w {{.*}}, r1, lsr #6 %tmp = lshr i32 %b, 6 %tmp1 = icmp ne i32 %tmp, %a - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } -define i1 @f8(i32 %a, i32 %b) { +define i32 @f8(i32 %a, i32 %b) { ; CHECK-LABEL: f8: ; CHECK: cmp.w {{.*}}, r1, asr #7 %tmp = ashr i32 %b, 7 %tmp1 = icmp eq i32 %a, %tmp - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } -define i1 @f9(i32 %a, i32 %b) { +define i32 @f9(i32 %a, i32 %b) { ; CHECK-LABEL: f9: ; CHECK: cmp.w {{.*}}, {{.*}}, ror #8 %l8 = shl i32 %a, 24 %r8 = lshr i32 %a, 8 %tmp = or i32 %l8, %r8 %tmp1 = icmp ne i32 %a, %tmp - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } Index: test/CodeGen/Thumb2/thumb2-teq.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-teq.ll +++ test/CodeGen/Thumb2/thumb2-teq.ll @@ -4,54 +4,107 @@ ; test as 'mov.w r0, #0'. ; 0x000000bb = 187 -define i1 @f2(i32 %a) { +define i32 @f2(i32 %a) { %tmp = xor i32 %a, 187 %tmp1 = icmp eq i32 0, %tmp - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f2: ; CHECK: teq.w {{.*}}, #187 ; 0x00aa00aa = 11141290 -define i1 @f3(i32 %a) { +define i32 @f3(i32 %a) { %tmp = xor i32 %a, 11141290 %tmp1 = icmp eq i32 %tmp, 0 - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f3: ; CHECK: teq.w {{.*}}, #11141290 ; 0xcc00cc00 = 3422604288 -define i1 @f6(i32 %a) { +define i32 @f6(i32 %a) { %tmp = xor i32 %a, 3422604288 %tmp1 = icmp eq i32 0, %tmp - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f6: ; CHECK: teq.w {{.*}}, #-872363008 ; 0xdddddddd = 3722304989 -define i1 @f7(i32 %a) { +define i32 @f7(i32 %a) { %tmp = xor i32 %a, 3722304989 %tmp1 = icmp eq i32 %tmp, 0 - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f7: ; CHECK: teq.w {{.*}}, #-572662307 +; 0x00110000 = 1114112 +define i32 @f10(i32 %a) { + %tmp = xor i32 %a, 1114112 + %tmp1 = icmp eq i32 0, %tmp + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret +} +; CHECK-LABEL: f10: +; CHECK: teq.w {{.*}}, #1114112 + +; 0x000000bb = 187 +define i1 @f12(i32 %a) { + %tmp = xor i32 %a, 187 + %tmp1 = icmp eq i32 0, %tmp + ret i1 %tmp1 +} +; CHECK-LABEL: f12: +; CHECK: eor r0, r0, #187 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 + +; 0x00aa00aa = 11141290 +define i1 @f13(i32 %a) { + %tmp = xor i32 %a, 11141290 + %tmp1 = icmp eq i32 %tmp, 0 + ret i1 %tmp1 +} +; CHECK-LABEL: f13: +; CHECK: eor r0, r0, #11141290 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 + +; 0xcc00cc00 = 3422604288 +define i1 @f16(i32 %a) { + %tmp = xor i32 %a, 3422604288 + %tmp1 = icmp eq i32 0, %tmp + ret i1 %tmp1 +} +; CHECK-LABEL: f16: +; CHECK: eor r0, r0, #-872363008 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 + ; 0xdddddddd = 3722304989 -define i1 @f8(i32 %a) { +define i1 @f17(i32 %a) { %tmp = xor i32 %a, 3722304989 - %tmp1 = icmp ne i32 0, %tmp + %tmp1 = icmp eq i32 %tmp, 0 ret i1 %tmp1 } +; CHECK-LABEL: f17: +; CHECK: eor r0, r0, #-572662307 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 ; 0x00110000 = 1114112 -define i1 @f10(i32 %a) { +define i1 @f18(i32 %a) { %tmp = xor i32 %a, 1114112 %tmp1 = icmp eq i32 0, %tmp ret i1 %tmp1 } -; CHECK-LABEL: f10: -; CHECK: teq.w {{.*}}, #1114112 +; CHECK-LABEL: f18: +; CHECK: eor r0, r0, #1114112 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 Index: test/CodeGen/Thumb2/thumb2-teq2.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-teq2.ll +++ test/CodeGen/Thumb2/thumb2-teq2.ll @@ -3,50 +3,55 @@ ; These tests would be improved by 'movs r0, #0' being rematerialized below the ; tst as 'mov.w r0, #0'. -define i1 @f2(i32 %a, i32 %b) { +define i32 @f2(i32 %a, i32 %b) { ; CHECK: f2 ; CHECK: teq.w {{.*}}, r1 %tmp = xor i32 %a, %b %tmp1 = icmp eq i32 %tmp, 0 - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } -define i1 @f4(i32 %a, i32 %b) { +define i32 @f4(i32 %a, i32 %b) { ; CHECK: f4 ; CHECK: teq.w {{.*}}, r1 %tmp = xor i32 %a, %b %tmp1 = icmp eq i32 0, %tmp - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } -define i1 @f6(i32 %a, i32 %b) { +define i32 @f6(i32 %a, i32 %b) { ; CHECK: f6 ; CHECK: teq.w {{.*}}, r1, lsl #5 %tmp = shl i32 %b, 5 %tmp1 = xor i32 %a, %tmp %tmp2 = icmp eq i32 %tmp1, 0 - ret i1 %tmp2 + %ret = select i1 %tmp2, i32 42, i32 24 + ret i32 %ret } -define i1 @f7(i32 %a, i32 %b) { +define i32 @f7(i32 %a, i32 %b) { ; CHECK: f7 ; CHECK: teq.w {{.*}}, r1, lsr #6 %tmp = lshr i32 %b, 6 %tmp1 = xor i32 %a, %tmp %tmp2 = icmp eq i32 %tmp1, 0 - ret i1 %tmp2 + %ret = select i1 %tmp2, i32 42, i32 24 + ret i32 %ret } -define i1 @f8(i32 %a, i32 %b) { +define i32 @f8(i32 %a, i32 %b) { ; CHECK: f8 ; CHECK: teq.w {{.*}}, r1, asr #7 %tmp = ashr i32 %b, 7 %tmp1 = xor i32 %a, %tmp %tmp2 = icmp eq i32 %tmp1, 0 - ret i1 %tmp2 + %ret = select i1 %tmp2, i32 42, i32 24 + ret i32 %ret } -define i1 @f9(i32 %a, i32 %b) { +define i32 @f9(i32 %a, i32 %b) { ; CHECK: f9 ; CHECK: teq.w {{.*}}, {{.*}}, ror #8 %l8 = shl i32 %a, 24 @@ -54,5 +59,6 @@ %tmp = or i32 %l8, %r8 %tmp1 = xor i32 %a, %tmp %tmp2 = icmp eq i32 %tmp1, 0 - ret i1 %tmp2 + %ret = select i1 %tmp2, i32 42, i32 24 + ret i32 %ret } Index: test/CodeGen/Thumb2/thumb2-tst.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-tst.ll +++ test/CodeGen/Thumb2/thumb2-tst.ll @@ -4,46 +4,107 @@ ; tst as 'mov.w r0, #0'. ; 0x000000bb = 187 -define i1 @f2(i32 %a) { +define i32 @f2(i32 %a) { %tmp = and i32 %a, 187 %tmp1 = icmp eq i32 0, %tmp - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f2: ; CHECK: tst.w {{.*}}, #187 ; 0x00aa00aa = 11141290 -define i1 @f3(i32 %a) { +define i32 @f3(i32 %a) { %tmp = and i32 %a, 11141290 %tmp1 = icmp eq i32 %tmp, 0 - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f3: ; CHECK: tst.w {{.*}}, #11141290 ; 0xcc00cc00 = 3422604288 -define i1 @f6(i32 %a) { +define i32 @f6(i32 %a) { %tmp = and i32 %a, 3422604288 %tmp1 = icmp eq i32 0, %tmp - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f6: ; CHECK: tst.w {{.*}}, #-872363008 ; 0xdddddddd = 3722304989 -define i1 @f7(i32 %a) { +define i32 @f7(i32 %a) { %tmp = and i32 %a, 3722304989 %tmp1 = icmp eq i32 %tmp, 0 - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f7: ; CHECK: tst.w {{.*}}, #-572662307 ; 0x00110000 = 1114112 -define i1 @f10(i32 %a) { +define i32 @f10(i32 %a) { %tmp = and i32 %a, 1114112 %tmp1 = icmp eq i32 0, %tmp - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } ; CHECK-LABEL: f10: ; CHECK: tst.w {{.*}}, #1114112 + +; 0x000000bb = 187 +define i1 @f12(i32 %a) { + %tmp = and i32 %a, 187 + %tmp1 = icmp eq i32 0, %tmp + ret i1 %tmp1 +} +; CHECK-LABEL: f12: +; CHECK: and r0, r0, #187 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 + +; 0x00aa00aa = 11141290 +define i1 @f13(i32 %a) { + %tmp = and i32 %a, 11141290 + %tmp1 = icmp eq i32 %tmp, 0 + ret i1 %tmp1 +} +; CHECK-LABEL: f13: +; CHECK: and r0, r0, #11141290 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 + +; 0xcc00cc00 = 3422604288 +define i1 @f16(i32 %a) { + %tmp = and i32 %a, 3422604288 + %tmp1 = icmp eq i32 0, %tmp + ret i1 %tmp1 +} +; CHECK-LABEL: f16: +; CHECK: and r0, r0, #-872363008 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 + +; 0xdddddddd = 3722304989 +define i1 @f17(i32 %a) { + %tmp = and i32 %a, 3722304989 + %tmp1 = icmp eq i32 %tmp, 0 + ret i1 %tmp1 +} +; CHECK-LABEL: f17: +; CHECK: bic r0, r0, #572662306 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 + +; 0x00110000 = 1114112 +define i1 @f18(i32 %a) { + %tmp = and i32 %a, 1114112 + %tmp1 = icmp eq i32 0, %tmp + ret i1 %tmp1 +} +; CHECK-LABEL: f18: +; CHECK: and r0, r0, #1114112 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 + Index: test/CodeGen/Thumb2/thumb2-tst2.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-tst2.ll +++ test/CodeGen/Thumb2/thumb2-tst2.ll @@ -3,50 +3,55 @@ ; These tests would be improved by 'movs r0, #0' being rematerialized below the ; tst as 'mov.w r0, #0'. -define i1 @f2(i32 %a, i32 %b) { +define i32 @f2(i32 %a, i32 %b) { ; CHECK-LABEL: f2: ; CHECK: tst {{.*}}, r1 %tmp = and i32 %a, %b %tmp1 = icmp eq i32 %tmp, 0 - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } -define i1 @f4(i32 %a, i32 %b) { +define i32 @f4(i32 %a, i32 %b) { ; CHECK-LABEL: f4: ; CHECK: tst {{.*}}, r1 %tmp = and i32 %a, %b %tmp1 = icmp eq i32 0, %tmp - ret i1 %tmp1 + %ret = select i1 %tmp1, i32 42, i32 24 + ret i32 %ret } -define i1 @f6(i32 %a, i32 %b) { +define i32 @f6(i32 %a, i32 %b) { ; CHECK-LABEL: f6: ; CHECK: tst.w {{.*}}, r1, lsl #5 %tmp = shl i32 %b, 5 %tmp1 = and i32 %a, %tmp %tmp2 = icmp eq i32 %tmp1, 0 - ret i1 %tmp2 + %ret = select i1 %tmp2, i32 42, i32 24 + ret i32 %ret } -define i1 @f7(i32 %a, i32 %b) { +define i32 @f7(i32 %a, i32 %b) { ; CHECK-LABEL: f7: ; CHECK: tst.w {{.*}}, r1, lsr #6 %tmp = lshr i32 %b, 6 %tmp1 = and i32 %a, %tmp %tmp2 = icmp eq i32 %tmp1, 0 - ret i1 %tmp2 + %ret = select i1 %tmp2, i32 42, i32 24 + ret i32 %ret } -define i1 @f8(i32 %a, i32 %b) { +define i32 @f8(i32 %a, i32 %b) { ; CHECK-LABEL: f8: ; CHECK: tst.w {{.*}}, r1, asr #7 %tmp = ashr i32 %b, 7 %tmp1 = and i32 %a, %tmp %tmp2 = icmp eq i32 %tmp1, 0 - ret i1 %tmp2 + %ret = select i1 %tmp2, i32 42, i32 24 + ret i32 %ret } -define i1 @f9(i32 %a, i32 %b) { +define i32 @f9(i32 %a, i32 %b) { ; CHECK-LABEL: f9: ; CHECK: tst.w {{.*}}, {{.*}}, ror #8 %l8 = shl i32 %a, 24 @@ -54,5 +59,6 @@ %tmp = or i32 %l8, %r8 %tmp1 = and i32 %a, %tmp %tmp2 = icmp eq i32 %tmp1, 0 - ret i1 %tmp2 + %ret = select i1 %tmp2, i32 42, i32 24 + ret i32 %ret }