Index: llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp =================================================================== --- llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -972,113 +972,108 @@ return NewMO; } +static MachineOperand getMovOperand(const MachineOperand &MO, + unsigned TargetFlag) { + unsigned TF = MO.getTargetFlags() | TargetFlag; + switch (MO.getType()) { + case MachineOperand::MO_Immediate: { + unsigned Imm = MO.getImm(); + switch (TargetFlag) { + case ARMII::MO_HI_8_15: + Imm = (Imm >> 24) & 0xff; + break; + case ARMII::MO_HI_0_7: + Imm = (Imm >> 16) & 0xff; + break; + case ARMII::MO_LO_8_15: + Imm = (Imm >> 8) & 0xff; + break; + case ARMII::MO_LO_0_7: + Imm = Imm & 0xff; + break; + case ARMII::MO_HI16: + Imm = (Imm >> 16) & 0xffff; + break; + case ARMII::MO_LO16: + Imm = Imm & 0xffff; + break; + default: + llvm_unreachable("Only HI/LO target flags are expected"); + } + return MachineOperand::CreateImm(Imm); + } + case MachineOperand::MO_ExternalSymbol: + return MachineOperand::CreateES(MO.getSymbolName(), TF); + case MachineOperand::MO_JumpTableIndex: + return MachineOperand::CreateJTI(MO.getIndex(), TF); + default: + return MachineOperand::CreateGA(MO.getGlobal(), MO.getOffset(), TF); + } +} + void ARMExpandPseudo::ExpandTMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); const MachineOperand &MO = MI.getOperand(1); - MachineInstrBuilder Upper8_15, LSL_U8_15, Upper0_7, Lower8_15, Lower0_7; unsigned MIFlags = MI.getFlags(); LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); - Upper8_15 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tMOVi8), DstReg) - .add(t1CondCodeOp(true)); - - LSL_U8_15 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tLSLri), DstReg) - .add(t1CondCodeOp(true)) - .addReg(DstReg) - .addImm(8) - .add(predOps(ARMCC::AL)) - .setMIFlags(MIFlags); - - Upper0_7 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tADDi8), DstReg) - .add(t1CondCodeOp(true)) - .addReg(DstReg); - - MachineInstr *LSL_U0_7 = MBB.getParent()->CloneMachineInstr(LSL_U8_15); - MBB.insert(MBBI, LSL_U0_7); - - Lower8_15 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tADDi8), DstReg) - .add(t1CondCodeOp(true)) - .addReg(DstReg); - - MachineInstr *LSL_L8_15 = MBB.getParent()->CloneMachineInstr(LSL_U8_15); - MBB.insert(MBBI, LSL_L8_15); - - Lower0_7 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tADDi8)) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .add(t1CondCodeOp(true)) - .addReg(DstReg); + // Expand the mov into a sequence of mov/add+lsl of the individual bytes. We + // want to avoid emitting any zero bytes, as they won't change the result, and + // also don't want any pointless shifts, so instead of immediately emitting + // the shift for a byte we keep track of how much we will need to shift and do + // it before the next nonzero byte. + unsigned PendingShift = 0; + for (unsigned Byte = 0; Byte < 4; ++Byte) { + unsigned Flag = Byte == 0 ? ARMII::MO_HI_8_15 + : Byte == 1 ? ARMII::MO_HI_0_7 + : Byte == 2 ? ARMII::MO_LO_8_15 + : ARMII::MO_LO_0_7; + MachineOperand Operand = getMovOperand(MO, Flag); + bool ZeroImm = Operand.isImm() && Operand.getImm() == 0; + unsigned Op = PendingShift ? ARM::tADDi8 : ARM::tMOVi8; + + // Emit the pending shift if we're going to emit this byte or if we've + // reached the end. + if (PendingShift && (!ZeroImm || Byte == 3)) { + MachineInstr *Lsl = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tLSLri), DstReg) + .add(t1CondCodeOp(true)) + .addReg(DstReg) + .addImm(PendingShift) + .add(predOps(ARMCC::AL)) + .setMIFlags(MIFlags); + LLVM_DEBUG(dbgs() << "And: "; Lsl->dump();); + PendingShift = 0; + } - Upper8_15.setMIFlags(MIFlags); - Upper0_7.setMIFlags(MIFlags); - Lower8_15.setMIFlags(MIFlags); - Lower0_7.setMIFlags(MIFlags); + // Emit this byte if it's nonzero. + if (!ZeroImm) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Op), DstReg) + .add(t1CondCodeOp(true)); + if (Op == ARM::tADDi8) + MIB.addReg(DstReg); + MIB.add(Operand); + MIB.add(predOps(ARMCC::AL)); + MIB.setMIFlags(MIFlags); + LLVM_DEBUG(dbgs() << (Op == ARM::tMOVi8 ? "To: " : "And:") << " "; + MIB.getInstr()->dump();); + } - switch (MO.getType()) { - case MachineOperand::MO_Immediate: { - unsigned Imm = MO.getImm(); - unsigned Hi8_15 = (Imm >> 24) & 0xff; - unsigned Hi0_7 = (Imm >> 16) & 0xff; - unsigned Lo8_15 = (Imm >> 8) & 0xff; - unsigned Lo0_7 = Imm & 0xff; - Upper8_15 = Upper8_15.addImm(Hi8_15); - Upper0_7 = Upper0_7.addImm(Hi0_7); - Lower8_15 = Lower8_15.addImm(Lo8_15); - Lower0_7 = Lower0_7.addImm(Lo0_7); - break; - } - case MachineOperand::MO_ExternalSymbol: { - const char *ES = MO.getSymbolName(); - unsigned TF = MO.getTargetFlags(); - Upper8_15 = Upper8_15.addExternalSymbol(ES, TF | ARMII::MO_HI_8_15); - Upper0_7 = Upper0_7.addExternalSymbol(ES, TF | ARMII::MO_HI_0_7); - Lower8_15 = Lower8_15.addExternalSymbol(ES, TF | ARMII::MO_LO_8_15); - Lower0_7 = Lower0_7.addExternalSymbol(ES, TF | ARMII::MO_LO_0_7); - break; - } - case MachineOperand::MO_JumpTableIndex: { - unsigned Idx = MO.getIndex(); - unsigned TF = MO.getTargetFlags(); - Upper8_15 = Upper8_15.addJumpTableIndex(Idx, TF | ARMII::MO_HI_8_15); - Upper0_7 = Upper0_7.addJumpTableIndex(Idx, TF | ARMII::MO_HI_0_7); - Lower8_15 = Lower8_15.addJumpTableIndex(Idx, TF | ARMII::MO_LO_8_15); - Lower0_7 = Lower0_7.addJumpTableIndex(Idx, TF | ARMII::MO_LO_0_7); - break; - } - default: { - const GlobalValue *GV = MO.getGlobal(); - unsigned TF = MO.getTargetFlags(); - Upper8_15 = - Upper8_15.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI_8_15); - Upper0_7 = - Upper0_7.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI_0_7); - Lower8_15 = - Lower8_15.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO_8_15); - Lower0_7 = - Lower0_7.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO_0_7); - break; - } + // Don't accumulate the shift value if we've not yet seen a nonzero byte. + if (PendingShift || !ZeroImm) + PendingShift += 8; } - Upper8_15 = Upper8_15.add(predOps(ARMCC::AL)); - Upper0_7 = Upper0_7.add(predOps(ARMCC::AL)); - Lower8_15 = Lower8_15.add(predOps(ARMCC::AL)); - Lower0_7 = Lower0_7.add(predOps(ARMCC::AL)); + // The dest is dead on the last instruction we emitted if it was dead on the + // original instruction. + (--MBBI)->getOperand(0).setIsDead(DstIsDead); MI.eraseFromParent(); - LLVM_DEBUG(dbgs() << "To: "; Upper8_15.getInstr()->dump();); - LLVM_DEBUG(dbgs() << "And: "; LSL_U8_15.getInstr()->dump();); - LLVM_DEBUG(dbgs() << "And: "; Upper0_7.getInstr()->dump();); - LLVM_DEBUG(dbgs() << "And: "; LSL_U0_7->dump();); - LLVM_DEBUG(dbgs() << "And: "; Lower8_15.getInstr()->dump();); - LLVM_DEBUG(dbgs() << "And: "; LSL_L8_15->dump();); - LLVM_DEBUG(dbgs() << "And: "; Lower0_7.getInstr()->dump();); } void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, @@ -1149,52 +1144,35 @@ } LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg); - HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg); - LO16.setMIFlags(MIFlags); - HI16.setMIFlags(MIFlags); - - switch (MO.getType()) { - case MachineOperand::MO_Immediate: { - unsigned Imm = MO.getImm(); - unsigned Lo16 = Imm & 0xffff; - unsigned Hi16 = (Imm >> 16) & 0xffff; - LO16 = LO16.addImm(Lo16); - HI16 = HI16.addImm(Hi16); - break; - } - case MachineOperand::MO_ExternalSymbol: { - const char *ES = MO.getSymbolName(); - unsigned TF = MO.getTargetFlags(); - LO16 = LO16.addExternalSymbol(ES, TF | ARMII::MO_LO16); - HI16 = HI16.addExternalSymbol(ES, TF | ARMII::MO_HI16); - break; - } - default: { - const GlobalValue *GV = MO.getGlobal(); - unsigned TF = MO.getTargetFlags(); - LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); - HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); - break; - } - } - + LO16.add(getMovOperand(MO, ARMII::MO_LO16)); LO16.cloneMemRefs(MI); - HI16.cloneMemRefs(MI); LO16.addImm(Pred).addReg(PredReg); - HI16.addImm(Pred).addReg(PredReg); + if (isCC) + LO16.add(makeImplicit(MI.getOperand(1))); + LLVM_DEBUG(dbgs() << "To: "; LO16.getInstr()->dump();); + + MachineOperand HIOperand = getMovOperand(MO, ARMII::MO_HI16); + if (!(HIOperand.isImm() && HIOperand.getImm() == 0)) { + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg); + HI16.setMIFlags(MIFlags); + HI16.add(HIOperand); + HI16.cloneMemRefs(MI); + HI16.addImm(Pred).addReg(PredReg); + LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump();); + } else { + LO16->getOperand(0).setIsDead(DstIsDead); + } if (RequiresBundling) finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator()); - if (isCC) - LO16.add(makeImplicit(MI.getOperand(1))); - TransferImpOps(MI, LO16, HI16); + assert(MI.getNumImplicitOperands() == 0 && + "MOVi32imm expected to have no implicit operands"); + MI.eraseFromParent(); - LLVM_DEBUG(dbgs() << "To: "; LO16.getInstr()->dump();); - LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump();); } // The size of the area, accessed by that VLSTM/VLLDM Index: llvm/test/CodeGen/ARM/execute-only-big-stack-frame.ll =================================================================== --- llvm/test/CodeGen/ARM/execute-only-big-stack-frame.ll +++ llvm/test/CodeGen/ARM/execute-only-big-stack-frame.ll @@ -26,11 +26,9 @@ ; CHECK-MOVW-MOVT-ADD: add sp, [[REG1]] ; CHECK-MOVW-MOVT-ADD-NOT: ldr {{r[0-9]+}}, .{{.*}} ; CHECK-MOVW-MOVT-ADD: movw [[REG2:r[0-9]+]], #65532 -; CHECK-MOVW-MOVT-ADD: movt [[REG2]], #0 ; CHECK-MOVW-MOVT-ADD: add [[REG2]], sp ; CHECK-MOVW-MOVT-ADD-NOT: ldr {{r[0-9]+}}, .{{.*}} ; CHECK-MOVW-MOVT-ADD: movw [[REG3:r[0-9]+]], #65532 -; CHECK-MOVW-MOVT-ADD: movt [[REG3]], #0 ; CHECK-MOVW-MOVT-ADD: add [[REG3]], sp ; CHECK-MOVW-MOVT-ADD-NOT: ldr {{r[0-9]+}}, .{{.*}} ; CHECK-MOVW-MOVT-ADD: movw [[REG4:r[0-9]+]], #0 Index: llvm/test/CodeGen/ARM/execute-only.ll =================================================================== --- llvm/test/CodeGen/ARM/execute-only.ll +++ llvm/test/CodeGen/ARM/execute-only.ll @@ -142,3 +142,68 @@ %v = load i32, ptr @external_global ret i32 %v } + +define i32 @test_imm() { +entry: +; CHECK-LABEL: test_imm: +; CHECK: movw [[IMMDEST:r[0-9]+]], #13124 +; CHECK-NEXT: movt [[IMMDEST]], #4386 +; CHECK-NEXT: bx lr +; CHECK-T1-LABEL: test_imm: +; CHECK-T1: movs [[IMMDEST:r[0-9]+]], #17 +; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #8 +; CHECK-T1-NEXT: adds [[IMMDEST]], #34 +; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #8 +; CHECK-T1-NEXT: adds [[IMMDEST]], #51 +; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #8 +; CHECK-T1-NEXT: adds [[IMMDEST]], #68 +; CHECK-T1-NEXT: bx lr + + ret i32 287454020 +} + +define i32 @test_imm_high_half() { +entry: +; CHECK-LABEL: test_imm_high_half: +; CHECK-T2BASE: movw [[IMMDEST:r[0-9]+]], #0 +; CHECK-T2: movs [[IMMDEST:r[0-9]+]], #0 +; CHECK-NEXT: movt [[IMMDEST]], #4386 +; CHECK-NEXT: bx lr +; CHECK-T1-LABEL: test_imm_high_half: +; CHECK-T1: movs [[IMMDEST:r[0-9]+]], #17 +; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #8 +; CHECK-T1-NEXT: adds [[IMMDEST]], #34 +; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #16 +; CHECK-T1-NEXT: bx lr + + ret i32 287440896 +} + +define i32 @test_imm_low_half() { +; CHECK-LABEL: test_imm_low_half: +; CHECK: movw [[IMMDEST:r[0-9]+]], #13124 +; CHECK-NEXT: bx lr +; CHECK-T1-LABEL: test_imm_low_half: +; CHECK-T1: movs [[IMMDEST]], #51 +; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #8 +; CHECK-T1-NEXT: adds [[IMMDEST]], #68 +; CHECK-T1-NEXT: bx lr + +entry: + ret i32 13124 +} + +define i32 @test_imm_middle_bytes() { +; CHECK-LABEL: test_imm_middle_bytes: +; CHECK: movw [[IMMDEST:r[0-9]+]], #13056 +; CHECK-NEXT: movt [[IMMDEST]], #34 +; CHECK-NEXT: bx lr +; CHECK-T1-LABEL: test_imm_middle_bytes: +; CHECK-T1: movs [[IMMDEST]], #34 +; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #8 +; CHECK-T1-NEXT: adds [[IMMDEST]], #51 +; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #8 +; CHECK-T1-NEXT: bx lr + + ret i32 2241280 +} Index: llvm/test/CodeGen/ARM/large-stack.ll =================================================================== --- llvm/test/CodeGen/ARM/large-stack.ll +++ llvm/test/CodeGen/ARM/large-stack.ll @@ -43,11 +43,7 @@ store i32 0, ptr %tmp ;; are we choosing correct store/tSTRspi pattern for execute-only ; CHECK: movs [[REG:r[0-9]+]], #0x30 -; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8 -; CHECK-NEXT: adds [[REG]], #0x0 -; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8 -; CHECK-NEXT: adds [[REG]], #0x0 -; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8 +; CHECK-NEXT: lsls [[REG]], [[REG]], #0x18 ; CHECK-NEXT: adds [[REG]], #0x8 %tmp1 = load i32, ptr %tmp ret i32 %tmp1 Index: llvm/test/CodeGen/Thumb2/segmented-stacks.ll =================================================================== --- llvm/test/CodeGen/Thumb2/segmented-stacks.ll +++ llvm/test/CodeGen/Thumb2/segmented-stacks.ll @@ -75,9 +75,8 @@ ; THUMB-LABEL: test_large: ; THUMB: push {r4, r5} -; THUMB-NEXT: movw r4, #40192 ; THUMB-NEXT: mov r5, sp -; THUMB-NEXT: movt r4, #0 +; THUMB-NEXT: movw r4, #40192 ; THUMB-NEXT: sub r5, r5, r4 ; THUMB-NEXT: mrc p15, #0, r4, c13, c0, #3 ; THUMB-NEXT: ldr.w r4, [r4, #252] @@ -85,7 +84,6 @@ ; THUMB-NEXT: bls .LBB1_2 ; THUMB: movw r4, #40192 -; THUMB-NEXT: movt r4, #0 ; THUMB-NEXT: mov r5, #0 ; THUMB-NEXT: push {lr} ; THUMB-NEXT: bl __morestack @@ -129,9 +127,8 @@ ; THUMB-LABEL: test_fastcc_large: ; THUMB: push {r4, r5} -; THUMB-NEXT: movw r4, #40192 ; THUMB-NEXT: mov r5, sp -; THUMB-NEXT: movt r4, #0 +; THUMB-NEXT: movw r4, #40192 ; THUMB-NEXT: sub r5, r5, r4 ; THUMB-NEXT: mrc p15, #0, r4, c13, c0, #3 ; THUMB-NEXT: ldr.w r4, [r4, #252] @@ -139,7 +136,6 @@ ; THUMB-NEXT: bls .LBB2_2 ; THUMB: movw r4, #40192 -; THUMB-NEXT: movt r4, #0 ; THUMB-NEXT: mov r5, #0 ; THUMB-NEXT: push {lr} ; THUMB-NEXT: bl __morestack