Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -3289,7 +3289,7 @@ } unsigned UseOpc = UseMI.getOpcode(); - unsigned NewUseOpc = 0; + unsigned NewUseOpc1 = 0, NewUseOpc2 = 0; uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm(); uint32_t SOImmValV1 = 0, SOImmValV2 = 0; bool Commute = false; @@ -3307,22 +3307,34 @@ switch (UseOpc) { default: break; case ARM::ADDrr: - case ARM::SUBrr: + case ARM::SUBrr: { if (UseOpc == ARM::SUBrr && Commute) return false; + bool IsSub = false; // ADD/SUB are special because they're essentially the same operation, so // we can handle a larger range of immediates. - if (ARM_AM::isSOImmTwoPartVal(ImmVal)) - NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri; - else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) { + if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { + NewUseOpc1 = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri; + NewUseOpc2 = NewUseOpc1; + } else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) { ImmVal = -ImmVal; - NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri; + NewUseOpc1 = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri; + NewUseOpc2 = NewUseOpc1; + } else if (ARM_AM::isSOImmTwoPartValSub(ImmVal, &SOImmValV2, + &SOImmValV1)) { + IsSub = true; + NewUseOpc1 = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri; + NewUseOpc2 = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri; } else return false; - SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); - SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); + + if (!IsSub) { + SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); + SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); + } break; + } case ARM::ORRrr: case ARM::EORrr: if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) @@ -3331,8 +3343,12 @@ SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); switch (UseOpc) { default: break; - case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; - case ARM::EORrr: NewUseOpc = ARM::EORri; break; + case ARM::ORRrr: + NewUseOpc1 = NewUseOpc2 = ARM::ORRri; + break; + case ARM::EORrr: + NewUseOpc1 = NewUseOpc2 = ARM::EORri; + break; } break; case ARM::t2ADDrr: @@ -3345,11 +3361,13 @@ const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP; const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri; const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri; - if (ARM_AM::isT2SOImmTwoPartVal(ImmVal)) - NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB; - else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) { + if (ARM_AM::isT2SOImmTwoPartVal(ImmVal)) { + NewUseOpc1 = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB; + NewUseOpc2 = NewUseOpc1; + } else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) { ImmVal = -ImmVal; - NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD; + NewUseOpc1 = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD; + NewUseOpc2 = NewUseOpc1; } else return false; SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); @@ -3364,8 +3382,12 @@ SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); switch (UseOpc) { default: break; - case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; - case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; + case ARM::t2ORRrr: + NewUseOpc1 = NewUseOpc2 = ARM::t2ORRri; + break; + case ARM::t2EORrr: + NewUseOpc1 = NewUseOpc2 = ARM::t2EORri; + break; } break; } @@ -3377,13 +3399,13 @@ bool isKill = UseMI.getOperand(OpIdx).isKill(); const TargetRegisterClass *TRC = MRI->getRegClass(Reg); Register NewReg = MRI->createVirtualRegister(TRC); - BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc), + BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc1), NewReg) .addReg(Reg1, getKillRegState(isKill)) .addImm(SOImmValV1) .add(predOps(ARMCC::AL)) .add(condCodeOp()); - UseMI.setDesc(get(NewUseOpc)); + UseMI.setDesc(get(NewUseOpc2)); UseMI.getOperand(1).setReg(NewReg); UseMI.getOperand(1).setIsKill(); UseMI.getOperand(2).ChangeToImmediate(SOImmValV2); @@ -3393,11 +3415,12 @@ // Then the below code will not be needed, as the input/output register // classes will be rgpr or gprSP. // For now, we fix the UseMI operand explicitly here: - switch(NewUseOpc){ + switch(NewUseOpc1) { case ARM::t2ADDspImm: case ARM::t2SUBspImm: case ARM::t2ADDri: case ARM::t2SUBri: + assert(NewUseOpc1 == NewUseOpc2 && "opc1 and opc2 are not identical"); MRI->setRegClass(UseMI.getOperand(0).getReg(), TRC); } return true; @@ -5522,6 +5545,8 @@ return ForCodesize ? 8 : 2; if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs return ForCodesize ? 8 : 2; + if (ARM_AM::isSOImmTwoPartValSub(Val)) // two instrs + return ForCodesize ? 8 : 2; } if (Subtarget->useMovt()) // MOVW + MOVT return ForCodesize ? 8 : 2; Index: llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp =================================================================== --- llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -884,7 +884,7 @@ .addReg(DstReg); SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); - } else { // Expand into a mvn + sub. + } else if (ARM_AM::isSOImmTwoPartValNeg(ImmVal)) { // Expand to mvn + sub. LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg); HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) @@ -892,7 +892,14 @@ SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal); SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal); SOImmValV1 = ~(-SOImmValV1); - } + } else if (ARM_AM::isSOImmTwoPartValSub(ImmVal, &SOImmValV2, &SOImmValV1)) { + // Expand into a movi + subri. + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg); + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg); + } else + llvm_unreachable("unknown immediate type"); unsigned MIFlags = MI.getFlags(); LO16 = LO16.addImm(SOImmValV1); Index: llvm/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrInfo.td +++ llvm/lib/Target/ARM/ARMInstrInfo.td @@ -833,7 +833,9 @@ return true; if (ARM_AM::isSOImmTwoPartVal(Imm.getZExtValue())) return true; - return ARM_AM::isSOImmTwoPartValNeg(Imm.getZExtValue()); + if (ARM_AM::isSOImmTwoPartValNeg(Imm.getZExtValue())) + return true; + return ARM_AM::isSOImmTwoPartValSub(Imm.getZExtValue()); }]>; /// imm0_1 predicate - Immediate in the range [0,1]. Index: llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h =================================================================== --- llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -219,6 +219,35 @@ return !(rotr32(~255U, getSOImmValRotate(First)) & First); } + /// isSOImmTwoPartValSub - Return true if the specified value V can be + /// obtained by two SOImmVal's subtraction: V = Y - X. + inline bool isSOImmTwoPartValSub(unsigned V, unsigned *PX = nullptr, + unsigned *PY = nullptr) { + // If this can be handled with a single shifter_op, bail out. + if ((rotr32(~255U, getSOImmValRotate(V)) & V) == 0) + return false; + // Check if V can be obtained by Y-X, both Y and X are SOImmVal. + // Suppose V in the form of + // {leading 00, upper effective bits, lower 8 effective bits, trailing 00} + // The first step is calculating the top bit position of the lower 8 + // effective bits. + unsigned Bits = countTrailingZeros(V); + Bits += Bits & 1 ? 7 : 8; + // The second step is calculating X. + unsigned XMask = (1 << Bits) - 1; + unsigned X = (1 << Bits) - (V & XMask); + // The third step is calculating Y. + unsigned Y = V + X; + // Check if Y is SOImmVal. + if ((rotr32(~255U, getSOImmValRotate(Y)) & Y) != 0) + return false; + if (PX) + *PX = X; + if (PY) + *PY = Y; + return true; + } + /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed /// by a left shift. Returns the shift amount to use. inline unsigned getThumbImmValShift(unsigned Imm) { Index: llvm/test/CodeGen/ARM/add-sub-imm.ll =================================================================== --- llvm/test/CodeGen/ARM/add-sub-imm.ll +++ llvm/test/CodeGen/ARM/add-sub-imm.ll @@ -1,70 +1,178 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s --check-prefix=CHECK-ARM +; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \ +; RUN: | FileCheck %s --check-prefix=CHECK-THUMB2 ;; Check how immediates are handled in add/sub. define i32 @sub0(i32 %0) { -; CHECK-LABEL: sub0: -; CHECK: @ %bb.0: -; CHECK-NEXT: sub r0, r0, #23 -; CHECK-NEXT: mov pc, lr +; CHECK-ARM-LABEL: sub0: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM: sub r0, r0, #23 +; CHECK-ARM: mov pc, lr +; +; CHECK-THUMB2-LABEL: sub0: +; CHECK-THUMB2: @ %bb.0: +; CHECK-THUMB2: subs r0, #23 +; CHECK-THUMB2: bx lr %2 = sub i32 %0, 23 ret i32 %2 } define i32 @sub1(i32 %0) { -; CHECK-LABEL: sub1: -; CHECK: @ %bb.0: -; CHECK-NEXT: ldr r1, .LCPI1_0 -; CHECK-NEXT: add r0, r0, r1 -; CHECK-NEXT: mov pc, lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI1_0: -; CHECK-NEXT: .long 4294836225 @ 0xfffe0001 +; CHECK-ARM-LABEL: sub1: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM: ldr r1, .LCPI1_0 +; CHECK-ARM: add r0, r0, r1 +; CHECK-ARM: mov pc, lr +; CHECK-ARM: .p2align 2 +; CHECK-ARM: @ %bb.1: +; CHECK-ARM: .LCPI1_0: +; CHECK-ARM: .long 4294836225 @ 0xfffe0001 +; +; CHECK-THUMB2-LABEL: sub1: +; CHECK-THUMB2: @ %bb.0: +; CHECK-THUMB2 movs r1, #1 +; CHECK-THUMB2 movt r1, #65534 +; CHECK-THUMB2 add r0, r1 +; CHECK-THUMB2 bx lr %2 = sub i32 %0, 131071 ret i32 %2 } define i32 @sub2(i32 %0) { -; CHECK-LABEL: sub2: -; CHECK: @ %bb.0: -; CHECK-NEXT: sub r0, r0, #35 -; CHECK-NEXT: sub r0, r0, #8960 -; CHECK-NEXT: mov pc, lr +; CHECK-ARM-LABEL: sub2: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM: sub r0, r0, #35 +; CHECK-ARM: sub r0, r0, #8960 +; CHECK-ARM: mov pc, lr +; +; CHECK-THUMB2-LABEL: sub2: +; CHECK-THUMB2: @ %bb.0: +; CHECK-THUMB2: movw r1, #8995 +; CHECK-THUMB2: subs r0, r0, r1 +; CHECK-THUMB2: bx lr %2 = sub i32 %0, 8995 ret i32 %2 } define i32 @add0(i32 %0) { -; CHECK-LABEL: add0: -; CHECK: @ %bb.0: -; CHECK-NEXT: add r0, r0, #23 -; CHECK-NEXT: mov pc, lr +; CHECK-ARM-LABEL: add0: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM: add r0, r0, #23 +; CHECK-ARM: mov pc, lr +; +; CHECK-THUMB2-LABEL: add0: +; CHECK-THUMB2: @ %bb.0: +; CHECK-THUMB2: adds r0, #23 +; CHECK-THUMB2: bx lr %2 = add i32 %0, 23 ret i32 %2 } define i32 @add1(i32 %0) { -; CHECK-LABEL: add1: -; CHECK: @ %bb.0: -; CHECK-NEXT: ldr r1, .LCPI4_0 -; CHECK-NEXT: add r0, r0, r1 -; CHECK-NEXT: mov pc, lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI4_0: -; CHECK-NEXT: .long 131071 @ 0x1ffff +; CHECK-ARM-LABEL: add1: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM: add r0, r0, #131072 +; CHECK-ARM: sub r0, r0, #1 +; CHECK-ARM: mov pc, lr +; +; CHECK-THUMB2-LABEL: add1: +; CHECK-THUMB2: @ %bb.0: +; CHECK-THUMB2: movw r1, #65535 +; CHECK-THUMB2: movt r1, #1 +; CHECK-THUMB2: add r0, r1 +; CHECK-THUMB2: bx lr %2 = add i32 %0, 131071 ret i32 %2 } define i32 @add2(i32 %0) { -; CHECK-LABEL: add2: -; CHECK: @ %bb.0: -; CHECK-NEXT: add r0, r0, #8960 -; CHECK-NEXT: add r0, r0, #2293760 -; CHECK-NEXT: mov pc, lr +; CHECK-ARM-LABEL: add2: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM: add r0, r0, #8960 +; CHECK-ARM: add r0, r0, #2293760 +; CHECK-ARM: mov pc, lr +; +; CHECK-THUMB2-LABEL: add2: +; CHECK-THUMB2: @ %bb.0: +; CHECK-THUMB2: add.w r0, r0, #2293760 +; CHECK-THUMB2: add.w r0, r0, #8960 +; CHECK-THUMB2: bx lr %2 = add i32 %0, 2302720 ret i32 %2 } + +define i32 @add3(i32 %0) { +; CHECK-ARM-LABEL: add3: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM: ldr r1, .LCPI6_0 +; CHECK-ARM: add r0, r0, r1 +; CHECK-ARM: mov pc, lr +; CHECK-ARM: .p2align 2 +; CHECK-ARM: @ %bb.1: +; CHECK-ARM: .LCPI6_0: +; CHECK-ARM: .long 2096725 @ 0x1ffe55 +; +; CHECK-THUMB2-LABEL: add3: +; CHECK-THUMB2: @ %bb.0: +; CHECK-THUMB2: movw r1, #65109 +; CHECK-THUMB2: movt r1, #31 +; CHECK-THUMB2: add r0, r1 +; CHECK-THUMB2: bx lr + %2 = add i32 %0, 2096725 + ret i32 %2 +} + +define i32 @add4(i32 %0) { +; CHECK-ARM-LABEL: add4: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM: ldr r1, .LCPI7_0 +; CHECK-ARM: add r0, r0, r1 +; CHECK-ARM: mov pc, lr +; CHECK-ARM: .p2align 2 +; CHECK-ARM: @ %bb.1: +; CHECK-ARM: .LCPI7_0: +; CHECK-ARM: .long 8462149 @ 0x811f45 +; +; CHECK-THUMB2-LABEL: add4: +; CHECK-THUMB2: @ %bb.0: +; CHECK-THUMB2: movw r1, #8005 +; CHECK-THUMB2: movt r1, #129 +; CHECK-THUMB2: add r0, r1 +; CHECK-THUMB2: bx lr + %2 = add i32 %0, 8462149 + ret i32 %2 +} + +define i32 @orr0(i32 %0) { +; CHECK-ARM-LABEL: orr0: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM: orr r0, r0, #8960 +; CHECK-ARM: orr r0, r0, #2293760 +; CHECK-ARM: mov pc, lr +; +; CHECK-THUMB2-LABEL: orr0: +; CHECK-THUMB2: @ %bb.0: +; CHECK-THUMB2: orr r0, r0, #2293760 +; CHECK-THUMB2: orr r0, r0, #8960 +; CHECK-THUMB2: bx lr + %2 = or i32 %0, 2302720 + ret i32 %2 +} + +define i32 @eor0(i32 %0) { +; CHECK-ARM-LABEL: eor0: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM: eor r0, r0, #8960 +; CHECK-ARM: eor r0, r0, #2293760 +; CHECK-ARM: mov pc, lr +; +; CHECK-THUMB2-LABEL: eor0: +; CHECK-THUMB2: @ %bb.0: +; CHECK-THUMB2: eor r0, r0, #2293760 +; CHECK-THUMB2: eor r0, r0, #8960 +; CHECK-THUMB2: bx lr + %2 = xor i32 %0, 2302720 + ret i32 %2 +} Index: llvm/test/CodeGen/ARM/sadd_sat.ll =================================================================== --- llvm/test/CodeGen/ARM/sadd_sat.ll +++ llvm/test/CodeGen/ARM/sadd_sat.ll @@ -276,14 +276,11 @@ ; CHECK-ARMNODPS-NEXT: orr r1, r1, #32512 ; CHECK-ARMNODPS-NEXT: cmp r0, r1 ; CHECK-ARMNODPS-NEXT: movlt r1, r0 -; CHECK-ARMNODPS-NEXT: ldr r0, .LCPI2_0 +; CHECK-ARMNODPS-NEXT: mov r0, #0 +; CHECK-ARMNODPS-NEXT: sub r0, r0, #32768 ; CHECK-ARMNODPS-NEXT: cmn r1, #32768 ; CHECK-ARMNODPS-NEXT: movgt r0, r1 ; CHECK-ARMNODPS-NEXT: bx lr -; CHECK-ARMNODPS-NEXT: .p2align 2 -; CHECK-ARMNODPS-NEXT: @ %bb.1: -; CHECK-ARMNODPS-NEXT: .LCPI2_0: -; CHECK-ARMNODPS-NEXT: .long 4294934528 @ 0xffff8000 ; ; CHECK-ARMBASEDSP-LABEL: func16: ; CHECK-ARMBASEDSP: @ %bb.0: Index: llvm/test/CodeGen/ARM/ssub_sat.ll =================================================================== --- llvm/test/CodeGen/ARM/ssub_sat.ll +++ llvm/test/CodeGen/ARM/ssub_sat.ll @@ -277,14 +277,11 @@ ; CHECK-ARMNODPS-NEXT: orr r1, r1, #32512 ; CHECK-ARMNODPS-NEXT: cmp r0, r1 ; CHECK-ARMNODPS-NEXT: movlt r1, r0 -; CHECK-ARMNODPS-NEXT: ldr r0, .LCPI2_0 +; CHECK-ARMNODPS-NEXT: mov r0, #0 +; CHECK-ARMNODPS-NEXT: sub r0, r0, #32768 ; CHECK-ARMNODPS-NEXT: cmn r1, #32768 ; CHECK-ARMNODPS-NEXT: movgt r0, r1 ; CHECK-ARMNODPS-NEXT: bx lr -; CHECK-ARMNODPS-NEXT: .p2align 2 -; CHECK-ARMNODPS-NEXT: @ %bb.1: -; CHECK-ARMNODPS-NEXT: .LCPI2_0: -; CHECK-ARMNODPS-NEXT: .long 4294934528 @ 0xffff8000 ; ; CHECK-ARMBASEDSP-LABEL: func16: ; CHECK-ARMBASEDSP: @ %bb.0: