Index: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2544,7 +2544,11 @@ case ARM::EORrr: case ARM::EORri: case ARM::t2EORrr: - case ARM::t2EORri: { + case ARM::t2EORri: + case ARM::t2LSRri: + case ARM::t2LSRrr: + case ARM::t2LSLri: + case ARM::t2LSLrr: { // Scan forward for the use of CPSR // When checking against MI: if it's a conditional code that requires // checking of the V bit or C bit, then this is not safe to do. Index: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -245,6 +245,7 @@ bool tryInlineAsm(SDNode *N); void SelectConcatVector(SDNode *N); + void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); bool trySMLAWSMULW(SDNode *N); @@ -2730,6 +2731,87 @@ ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1))); } +static Optional> +getContiguousRangeOfSetBits(const APInt &A) { + unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; + unsigned LastOne = A.countTrailingZeros(); + if (A.countPopulation() != (FirstOne - LastOne + 1)) + return Optional>(); + return std::make_pair(FirstOne, LastOne); +} + +void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { + assert(N->getOpcode() == ARMISD::CMPZ); + SwitchEQNEToPLMI = false; + + if (!Subtarget->isThumb()) + // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and + // LSR don't exist as standalone instructions - they need the barrel shifter. + return; + + // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) + SDValue And = N->getOperand(0); + if (!And->hasOneUse()) + return; + + SDValue Zero = N->getOperand(1); + if (!isa(Zero) || !cast(Zero)->isNullValue() || + And->getOpcode() != ISD::AND) + return; + SDValue X = And.getOperand(0); + auto C = dyn_cast(And.getOperand(1)); + + if (!C || !X->hasOneUse()) + return; + auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); + if (!Range) + return; + + // There are several ways to lower this: + SDNode *NewN; + SDLoc dl(N); + + auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { + if (Subtarget->isThumb2()) { + Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; + SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); + } else { + SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, + CurDAG->getTargetConstant(Imm, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; + return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); + } + }; + + if (Range->second == 0) { + // 1. Mask includes the LSB -> Simply shift the top N bits off + NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); + ReplaceNode(And.getNode(), NewN); + } else if (Range->first == 31) { + // 2. Mask includes the MSB -> Simply shift the bottom N bits off + NewN = EmitShift(ARM::tLSRri, X, Range->second); + ReplaceNode(And.getNode(), NewN); + } else if (Range->first == Range->second) { + // 3. Only one bit is set. We can shift this into the sign bit and use a + // PL/MI comparison. + NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); + ReplaceNode(And.getNode(), NewN); + + SwitchEQNEToPLMI = true; + } else if (!Subtarget->hasV6T2Ops()) { + // 4. Do a double shift to clear bottom and top bits, but only in + // thumb-1 mode as in thumb-2 we can use UBFX. + NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); + NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), + Range->second + (31 - Range->first)); + ReplaceNode(And.getNode(), NewN); + } + +} + void ARMDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); @@ -2957,6 +3039,7 @@ return; } } + break; } case ARMISD::VMOVRRD: @@ -3148,9 +3231,27 @@ assert(N2.getOpcode() == ISD::Constant); assert(N3.getOpcode() == ISD::Register); - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast(N2)->getZExtValue()), dl, - MVT::i32); + unsigned CC = (unsigned) cast(N2)->getZExtValue(); + + if (InFlag.getOpcode() == ARMISD::CMPZ) { + bool SwitchEQNEToPLMI; + SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); + InFlag = N->getOperand(4); + + if (SwitchEQNEToPLMI) { + switch ((ARMCC::CondCodes)CC) { + default: llvm_unreachable("CMPZ must be either NE or EQ!"); + case ARMCC::NE: + CC = (unsigned)ARMCC::MI; + break; + case ARMCC::EQ: + CC = (unsigned)ARMCC::PL; + break; + } + } + } + + SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, MVT::Glue, Ops); @@ -3205,6 +3306,38 @@ // Other cases are autogenerated. break; } + + case ARMISD::CMOV: { + SDValue InFlag = N->getOperand(4); + + if (InFlag.getOpcode() == ARMISD::CMPZ) { + bool SwitchEQNEToPLMI; + SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); + + if (SwitchEQNEToPLMI) { + SDValue ARMcc = N->getOperand(2); + ARMCC::CondCodes CC = + (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); + + switch (CC) { + default: llvm_unreachable("CMPZ must be either NE or EQ!"); + case ARMCC::NE: + CC = ARMCC::MI; + break; + case ARMCC::EQ: + CC = ARMCC::PL; + break; + } + SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); + SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, + N->getOperand(3), N->getOperand(4)}; + CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); + } + + } + // Other cases are autogenerated. + break; + } case ARMISD::VZIP: { unsigned Opc = 0; Index: llvm/trunk/test/CodeGen/ARM/and-cmpz.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/and-cmpz.ll +++ llvm/trunk/test/CodeGen/ARM/and-cmpz.ll @@ -0,0 +1,71 @@ +; RUN: llc -mtriple=thumbv7m-linux-gnu < %s | FileCheck %s --check-prefix=CHECK --check-prefix=T2 +; RUN: llc -mtriple=thumbv6m-linux-gnu < %s | FileCheck %s --check-prefix=CHECK --check-prefix=T1 + +; CHECK-LABEL: single_bit: +; CHECK: lsls r0, r0, #23 +; T2-NEXT: mov +; T2-NEXT: it +; T1-NEXT: bmi +define i32 @single_bit(i32 %p) { + %a = and i32 %p, 256 + %b = icmp eq i32 %a, 0 + br i1 %b, label %true, label %false + +true: + ret i32 1 + +false: + ret i32 2 +} + +; CHECK-LABEL: multi_bit_lsb_ubfx: +; CHECK: lsls r0, r0, #24 +; T2-NEXT: mov +; T2-NEXT: it +; T1-NEXT: beq +define i32 @multi_bit_lsb_ubfx(i32 %p) { + %a = and i32 %p, 255 + %b = icmp eq i32 %a, 0 + br i1 %b, label %true, label %false + +true: + ret i32 1 + +false: + ret i32 2 +} + +; CHECK-LABEL: multi_bit_msb: +; CHECK: lsrs r0, r0, #24 +; T2-NEXT: mov +; T2-NEXT: it +; T1-NEXT: beq +define i32 @multi_bit_msb(i32 %p) { + %a = and i32 %p, 4278190080 ; 0xff000000 + %b = icmp eq i32 %a, 0 + br i1 %b, label %true, label %false + +true: + ret i32 1 + +false: + ret i32 2 +} + +; CHECK-LABEL: multi_bit_nosb: +; T1: lsls r0, r0, #8 +; T1-NEXT: lsrs r0, r0, #24 +; T2: tst.w +; T2-NEXT: it +; T1-NEXT: beq +define i32 @multi_bit_nosb(i32 %p) { + %a = and i32 %p, 16711680 ; 0x00ff0000 + %b = icmp eq i32 %a, 0 + br i1 %b, label %true, label %false + +true: + ret i32 1 + +false: + ret i32 2 +} Index: llvm/trunk/test/CodeGen/ARM/arm-and-tst-peephole.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ llvm/trunk/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -93,7 +93,7 @@ %1 = load i8, i8* %0, align 1 %2 = zext i8 %1 to i32 ; ARM: ands -; THUMB: ands +; THUMB: ands ; T2: ands ; V8: ands ; V8-NEXT: beq @@ -150,10 +150,9 @@ %rhs32 = zext i1 %rhs to i32 %diff = sub nsw i32 %lhs32, %rhs32 ; ARM: tst r1, #1 -; THUMB: movs [[RTMP:r[0-9]+]], #1 -; THUMB: tst r1, [[RTMP]] -; T2: tst.w r1, #1 -; V8: tst.w r1, #1 +; THUMB: lsls r1, r1, #31 +; T2: lsls r1, r1, #31 +; V8: lsls r1, r1, #31 ret i32 %diff } Index: llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping.ll +++ llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping.ll @@ -638,14 +638,13 @@ ; during PEI with shrink-wrapping enable. ; CHECK-LABEL: debug_info: ; -; ENABLE: tst{{(\.w)?}} r2, #1 +; ENABLE: {{tst r2, #1|lsls r1, r2, #31}} ; ENABLE-NEXT: beq [[BB13:LBB[0-9_]+]] ; ; CHECK: push ; -; DISABLE: tst{{(\.w)?}} r2, #1 -; DISABLE-NEXT: vst1.64 -; DISABLE-NEXT: beq [[BB13:LBB[0-9_]+]] +; DISABLE: {{tst r2, #1|lsls r1, r2, #31}} +; DISABLE: beq [[BB13:LBB[0-9_]+]] ; ; CHECK: bl{{x?}} _pow ; Index: llvm/trunk/test/CodeGen/ARM/call-tc.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/call-tc.ll +++ llvm/trunk/test/CodeGen/ARM/call-tc.ll @@ -120,7 +120,7 @@ br i1 %tobool2, label %if.end5, label %if.then3 if.then3: ; preds = %if.end -; CHECKT2D: bne.w _b +; CHECKT2D: bmi.w _b %call4 = tail call i32 @b(i32 %x) nounwind br label %return Index: llvm/trunk/test/CodeGen/ARM/debug-info-branch-folding.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/debug-info-branch-folding.ll +++ llvm/trunk/test/CodeGen/ARM/debug-info-branch-folding.ll @@ -3,8 +3,7 @@ target triple = "thumbv7-apple-macosx10.6.7" ;CHECK: vadd.f32 q4, q8, q8 -;CHECK: LBB0_1: -;CHECK-NOT: beq LBB0_1 +;CHECK-NEXT: LBB0_1 ;CHECK: @DEBUG_VALUE: x <- %Q4{{$}} ;CHECK-NEXT: @DEBUG_VALUE: y <- %Q4{{$}} Index: llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll +++ llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll @@ -650,11 +650,14 @@ ; CHECK: tst r3, r4 ; ENABLE-NEXT: pop {r4} -; ENABLE-NEXT: pop {r3} -; ENABLE-NEXT: mov lr, r3 +; ENABLE-NEXT: mov r12, r{{.*}} +; ENABLE-NEXT: pop {r0} +; ENABLE-NEXT: mov lr, r0 +; ENABLE-NEXT: mov r0, r12 ; CHECK-NEXT: beq [[EXIT_LABEL]] ; CHECK: str r1, [r2] +; CHECK: str r3, [r2] ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: [[EXIT_LABEL]]: @ %cleanup ; ENABLE-NEXT: bx lr @@ -675,6 +678,7 @@ if.end4: store i32 %head, i32* %y, align 4 + store volatile i32 %z, i32* %y, align 4 br label %cleanup cleanup: Index: llvm/trunk/test/CodeGen/Thumb2/float-ops.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/float-ops.ll +++ llvm/trunk/test/CodeGen/Thumb2/float-ops.ll @@ -259,9 +259,9 @@ define float @select_f(float %a, float %b, i1 %c) { ; CHECK-LABEL: select_f: -; NONE: tst.w r2, #1 +; NONE: lsls r2, r2, #31 ; NONE: moveq r0, r1 -; HARD: tst.w r0, #1 +; HARD: lsls r0, r0, #31 ; VFP4-ALL: vmovne.f32 s1, s0 ; VFP4-ALL: vmov.f32 s0, s1 ; FP-ARMv8: vseleq.f32 s0, s1, s0 @@ -271,8 +271,8 @@ define double @select_d(double %a, double %b, i1 %c) { ; CHECK-LABEL: select_d: -; NONE: ldr.w [[REG:r[0-9]+]], [sp] -; NONE: ands [[REG]], [[REG]], #1 +; NONE: ldr{{(.w)?}} [[REG:r[0-9]+]], [sp] +; NONE ands [[REG]], [[REG]], #1 ; NONE: moveq r0, r2 ; NONE: moveq r1, r3 ; SP: ands r0, r0, #1 @@ -282,7 +282,7 @@ ; SP-DAG: movne [[BLO]], [[ALO]] ; SP-DAG: movne [[BHI]], [[AHI]] ; SP: vmov d0, [[BLO]], [[BHI]] -; DP: tst.w r0, #1 +; DP: lsls r0, r0, #31 ; VFP4-DP: vmovne.f64 d1, d0 ; VFP4-DP: vmov.f64 d0, d1 ; FP-ARMV8: vseleq.f64 d0, d1, d0