Index: llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -97,6 +97,8 @@ return SelectImmShifterOperand(N, A, B, false); } + bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); + bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); @@ -569,6 +571,14 @@ return true; } +// Determine whether an ISD::OR's operands are suitable to turn the operation +// into an addition, which often has more compact encodings. +bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { + assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); + Out = N; + return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); +} + bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, SDValue &Base, Index: llvm/lib/Target/ARM/ARMInstrThumb.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrThumb.td +++ llvm/lib/Target/ARM/ARMInstrThumb.td @@ -270,6 +270,14 @@ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } +// Inspects parent to determine whether an or instruction can be implemented as +// an add (i.e. whether we know overflow won't occur in the add). +def AddLikeOrOp : ComplexPattern; + +// Pattern to exclude immediates from matching +def non_imm32 : PatLeaf<(i32 GPR), [{ return !isa(N); }]>; + //===----------------------------------------------------------------------===// // Miscellaneous Instructions. // @@ -997,6 +1005,15 @@ } } +// Thumb has more flexible short encodings for ADD than ORR, so use those where +// possible. +def : T1Pat<(or AddLikeOrOp:$Rn, imm0_7:$imm), (tADDi3 $Rn, imm0_7:$imm)>; + +def : T1Pat<(or AddLikeOrOp:$Rn, imm8_255:$imm), (tADDi8 $Rn, imm8_255:$imm)>; + +def : T1Pat<(or AddLikeOrOp:$Rn, tGPR:$Rm), (tADDrr $Rn, $Rm)>; + + def : tInstAlias <"add${s}${p} $Rdn, $Rm", (tADDrr tGPR:$Rdn,s_cc_out:$s, tGPR:$Rdn, tGPR:$Rm, pred:$p)>; Index: llvm/lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrThumb2.td +++ llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2594,6 +2594,18 @@ def : T2Pat<(t2_so_imm_not:$src), (t2MVNi t2_so_imm_not:$src)>; +// There are shorter Thumb encodings for ADD than ORR, so to increase +// Thumb2SizeReduction's chances later on we select a t2ADD for an or where +// possible. +def : T2Pat<(or AddLikeOrOp:$Rn, t2_so_imm:$imm), + (t2ADDri $Rn, t2_so_imm:$imm)>; + +def : T2Pat<(or AddLikeOrOp:$Rn, imm0_4095:$Rm), + (t2ADDri12 $Rn, imm0_4095:$Rm)>; + +def : T2Pat<(or AddLikeOrOp:$Rn, non_imm32:$Rm), + (t2ADDrr $Rn, $Rm)>; + //===----------------------------------------------------------------------===// // Multiply Instructions. // Index: llvm/test/CodeGen/ARM/add-like-or.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/add-like-or.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=thumbv6m-apple-macho %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T1 +; RUN: llc -mtriple=thumbv7m-apple-macho %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T2 + +define i32 @test_add_i3(i1 %tst, i32 %a, i32 %b) { +; CHECK-LABEL: test_add_i3: +; CHECK: adds r0, {{r[0-9]+}}, #2 + %tmp = and i32 %a, -7 + %tmp1 = and i32 %b, -4 + %int = select i1 %tst, i32 %tmp, i32 %tmp1 + + ; Call to force %int into a register that isn't r0 so using the i3 form is a + ; good idea. + call void @foo(i32 %int) + %res = or i32 %int, 2 + ret i32 %res +} + +define i32 @test_add_i8(i32 %a, i32 %b, i1 %tst) { +; CHECK-LABEL: test_add_i8: +; CHECK-T1: adds r0, #12 +; CHECK-T2: add.w r0, {{r[0-9]+}}, #12 + + %tmp = and i32 %a, -256 + %tmp1 = and i32 %b, -512 + %int = select i1 %tst, i32 %tmp, i32 %tmp1 + %res = or i32 %int, 12 + ret i32 %res +} + +define i32 @test_add_i12(i32 %a, i32 %b, i1 %tst) { +; CHECK-LABEL: test_add_i12: +; CHECK-T2: addw r0, {{r[0-9]+}}, #854 + + %tmp = and i32 %a, -4096 + %tmp1 = and i32 %b, -8192 + %int = select i1 %tst, i32 %tmp, i32 %tmp1 + %res = or i32 %int, 854 + ret i32 %res +} + +declare void @foo(i32) Index: llvm/test/CodeGen/ARM/shift-combine.ll =================================================================== --- llvm/test/CodeGen/ARM/shift-combine.ll +++ llvm/test/CodeGen/ARM/shift-combine.ll @@ -130,7 +130,7 @@ ; CHECK-V6M: ldrh [[LOW:r[0-9]+]], [r0, #2] ; CHECK-V6M: ldr [[HIGH:r[0-9]+]], [r0, #4] ; CHECK-V6M-NEXT: lsls [[HIGH]], [[HIGH]], #16 -; CHECK-V6M-NEXT: orrs r0, r1 +; CHECK-V6M-NEXT: adds r0, r1, r0 ; CHECK-ALIGN: ldr [[HIGH:r[0-9]+]], [r0, #4] ; CHECK-ALIGN-NEXT: ldrh [[LOW:r[0-9]+]], [r0, #2] ; CHECK-ALIGN-NEXT: orr.w r0, [[LOW]], [[HIGH]], lsl #16 Index: llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll =================================================================== --- llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll +++ llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll @@ -15,7 +15,7 @@ ; Make sure the cmp is not scheduled before the InlineAsm that clobbers cc. ; CHECK: bl _f2 -; CHECK: cmp r0, #0 +; CHECK: cmp {{r[0-9]+}}, #0 ; CHECK-NOT: cmp ; CHECK: InlineAsm Start define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind { Index: llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll =================================================================== --- llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll +++ llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll @@ -109,13 +109,13 @@ ; CHECK-DSP: and.w r0, r1, r0, lsr #7 ; CHECK-DSP: lsrs r1, r0, #5 ; CHECK-DSP: uxtb16 r1, r1 -; CHECk-DSP: orrs r0, r1 +; CHECk-DSP: adds r0, r1 ; CHECK-NO-DSP: mov.w r1, #16253176 ; CHECK-NO-DSP: and.w r0, r1, r0, lsr #7 ; CHECK-NO-DSP: mov.w r1, #458759 ; CHECK-NO-DSP: and.w r1, r1, r0, lsr #5 -; CHECK-NO-DSP: orrs r0, r1 +; CHECK-NO-DSP: add r0, r1 %tmp1 = lshr i32 %p0, 7 ; [#uses=1] %tmp2 = and i32 %tmp1, 16253176 ; [#uses=2] %tmp4 = lshr i32 %tmp2, 5 ; [#uses=1]