diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -972,109 +972,113 @@ return NewMO; } -static MachineOperand getMovOperand(const MachineOperand &MO, - unsigned TargetFlag) { - unsigned TF = MO.getTargetFlags() | TargetFlag; - switch (MO.getType()) { - case MachineOperand::MO_Immediate: { - unsigned Imm = MO.getImm(); - switch (TargetFlag) { - case ARMII::MO_HI_8_15: - Imm = (Imm >> 24) & 0xff; - break; - case ARMII::MO_HI_0_7: - Imm = (Imm >> 16) & 0xff; - break; - case ARMII::MO_LO_8_15: - Imm = (Imm >> 8) & 0xff; - break; - case ARMII::MO_LO_0_7: - Imm = Imm & 0xff; - break; - case ARMII::MO_HI16: - Imm = (Imm >> 16) & 0xffff; - break; - case ARMII::MO_LO16: - Imm = Imm & 0xffff; - break; - default: - llvm_unreachable("Only HI/LO target flags are expected"); - } - return MachineOperand::CreateImm(Imm); - } - case MachineOperand::MO_ExternalSymbol: - return MachineOperand::CreateES(MO.getSymbolName(), TF); - case MachineOperand::MO_JumpTableIndex: - return MachineOperand::CreateJTI(MO.getIndex(), TF); - default: - return MachineOperand::CreateGA(MO.getGlobal(), MO.getOffset(), TF); - } -} - void ARMExpandPseudo::ExpandTMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); const MachineOperand &MO = MI.getOperand(1); + MachineInstrBuilder Upper8_15, LSL_U8_15, Upper0_7, Lower8_15, Lower0_7; unsigned MIFlags = MI.getFlags(); LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); - // Expand the mov into a sequence of mov/add+lsl of the individual bytes. We - // want to avoid emitting any zero bytes, as they won't change the result, and - // also don't want any pointless shifts, so instead of immediately emitting - // the shift for a byte we keep track of how much we will need to shift and do - // it before the next nonzero byte. - unsigned PendingShift = 0; - for (unsigned Byte = 0; Byte < 4; ++Byte) { - unsigned Flag = Byte == 0 ? ARMII::MO_HI_8_15 - : Byte == 1 ? ARMII::MO_HI_0_7 - : Byte == 2 ? ARMII::MO_LO_8_15 - : ARMII::MO_LO_0_7; - MachineOperand Operand = getMovOperand(MO, Flag); - bool ZeroImm = Operand.isImm() && Operand.getImm() == 0; - unsigned Op = PendingShift ? ARM::tADDi8 : ARM::tMOVi8; - - // Emit the pending shift if we're going to emit this byte or if we've - // reached the end. - if (PendingShift && (!ZeroImm || Byte == 3)) { - MachineInstr *Lsl = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tLSLri), DstReg) - .add(t1CondCodeOp(true)) - .addReg(DstReg) - .addImm(PendingShift) - .add(predOps(ARMCC::AL)) - .setMIFlags(MIFlags); - (void)Lsl; - LLVM_DEBUG(dbgs() << "And: "; Lsl->dump();); - PendingShift = 0; - } + Upper8_15 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tMOVi8), DstReg) + .add(t1CondCodeOp(true)); - // Emit this byte if it's nonzero. - if (!ZeroImm) { - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Op), DstReg) - .add(t1CondCodeOp(true)); - if (Op == ARM::tADDi8) - MIB.addReg(DstReg); - MIB.add(Operand); - MIB.add(predOps(ARMCC::AL)); - MIB.setMIFlags(MIFlags); - LLVM_DEBUG(dbgs() << (Op == ARM::tMOVi8 ? "To: " : "And:") << " "; - MIB.getInstr()->dump();); - } + LSL_U8_15 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tLSLri), DstReg) + .add(t1CondCodeOp(true)) + .addReg(DstReg) + .addImm(8) + .add(predOps(ARMCC::AL)) + .setMIFlags(MIFlags); - // Don't accumulate the shift value if we've not yet seen a nonzero byte. - if (PendingShift || !ZeroImm) - PendingShift += 8; + Upper0_7 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tADDi8), DstReg) + .add(t1CondCodeOp(true)) + .addReg(DstReg); + + MachineInstr *LSL_U0_7 = MBB.getParent()->CloneMachineInstr(LSL_U8_15); + MBB.insert(MBBI, LSL_U0_7); + + Lower8_15 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tADDi8), DstReg) + .add(t1CondCodeOp(true)) + .addReg(DstReg); + + MachineInstr *LSL_L8_15 = MBB.getParent()->CloneMachineInstr(LSL_U8_15); + MBB.insert(MBBI, LSL_L8_15); + + Lower0_7 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tADDi8)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .add(t1CondCodeOp(true)) + .addReg(DstReg); + + Upper8_15.setMIFlags(MIFlags); + Upper0_7.setMIFlags(MIFlags); + Lower8_15.setMIFlags(MIFlags); + Lower0_7.setMIFlags(MIFlags); + + switch (MO.getType()) { + case MachineOperand::MO_Immediate: { + unsigned Imm = MO.getImm(); + unsigned Hi8_15 = (Imm >> 24) & 0xff; + unsigned Hi0_7 = (Imm >> 16) & 0xff; + unsigned Lo8_15 = (Imm >> 8) & 0xff; + unsigned Lo0_7 = Imm & 0xff; + Upper8_15 = Upper8_15.addImm(Hi8_15); + Upper0_7 = Upper0_7.addImm(Hi0_7); + Lower8_15 = Lower8_15.addImm(Lo8_15); + Lower0_7 = Lower0_7.addImm(Lo0_7); + break; + } + case MachineOperand::MO_ExternalSymbol: { + const char *ES = MO.getSymbolName(); + unsigned TF = MO.getTargetFlags(); + Upper8_15 = Upper8_15.addExternalSymbol(ES, TF | ARMII::MO_HI_8_15); + Upper0_7 = Upper0_7.addExternalSymbol(ES, TF | ARMII::MO_HI_0_7); + Lower8_15 = Lower8_15.addExternalSymbol(ES, TF | ARMII::MO_LO_8_15); + Lower0_7 = Lower0_7.addExternalSymbol(ES, TF | ARMII::MO_LO_0_7); + break; + } + case MachineOperand::MO_JumpTableIndex: { + unsigned Idx = MO.getIndex(); + unsigned TF = MO.getTargetFlags(); + Upper8_15 = Upper8_15.addJumpTableIndex(Idx, TF | ARMII::MO_HI_8_15); + Upper0_7 = Upper0_7.addJumpTableIndex(Idx, TF | ARMII::MO_HI_0_7); + Lower8_15 = Lower8_15.addJumpTableIndex(Idx, TF | ARMII::MO_LO_8_15); + Lower0_7 = Lower0_7.addJumpTableIndex(Idx, TF | ARMII::MO_LO_0_7); + break; + } + default: { + const GlobalValue *GV = MO.getGlobal(); + unsigned TF = MO.getTargetFlags(); + Upper8_15 = + Upper8_15.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI_8_15); + Upper0_7 = + Upper0_7.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI_0_7); + Lower8_15 = + Lower8_15.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO_8_15); + Lower0_7 = + Lower0_7.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO_0_7); + break; + } } - // The dest is dead on the last instruction we emitted if it was dead on the - // original instruction. - (--MBBI)->getOperand(0).setIsDead(DstIsDead); + Upper8_15 = Upper8_15.add(predOps(ARMCC::AL)); + Upper0_7 = Upper0_7.add(predOps(ARMCC::AL)); + Lower8_15 = Lower8_15.add(predOps(ARMCC::AL)); + Lower0_7 = Lower0_7.add(predOps(ARMCC::AL)); MI.eraseFromParent(); + LLVM_DEBUG(dbgs() << "To: "; Upper8_15.getInstr()->dump();); + LLVM_DEBUG(dbgs() << "And: "; LSL_U8_15.getInstr()->dump();); + LLVM_DEBUG(dbgs() << "And: "; Upper0_7.getInstr()->dump();); + LLVM_DEBUG(dbgs() << "And: "; LSL_U0_7->dump();); + LLVM_DEBUG(dbgs() << "And: "; Lower8_15.getInstr()->dump();); + LLVM_DEBUG(dbgs() << "And: "; LSL_L8_15->dump();); + LLVM_DEBUG(dbgs() << "And: "; Lower0_7.getInstr()->dump();); } void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, @@ -1145,35 +1149,52 @@ } LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg); + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg); + LO16.setMIFlags(MIFlags); - LO16.add(getMovOperand(MO, ARMII::MO_LO16)); - LO16.cloneMemRefs(MI); - LO16.addImm(Pred).addReg(PredReg); - if (isCC) - LO16.add(makeImplicit(MI.getOperand(1))); - LLVM_DEBUG(dbgs() << "To: "; LO16.getInstr()->dump();); + HI16.setMIFlags(MIFlags); - MachineOperand HIOperand = getMovOperand(MO, ARMII::MO_HI16); - if (!(HIOperand.isImm() && HIOperand.getImm() == 0)) { - HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg); - HI16.setMIFlags(MIFlags); - HI16.add(HIOperand); - HI16.cloneMemRefs(MI); - HI16.addImm(Pred).addReg(PredReg); - LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump();); - } else { - LO16->getOperand(0).setIsDead(DstIsDead); + switch (MO.getType()) { + case MachineOperand::MO_Immediate: { + unsigned Imm = MO.getImm(); + unsigned Lo16 = Imm & 0xffff; + unsigned Hi16 = (Imm >> 16) & 0xffff; + LO16 = LO16.addImm(Lo16); + HI16 = HI16.addImm(Hi16); + break; + } + case MachineOperand::MO_ExternalSymbol: { + const char *ES = MO.getSymbolName(); + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addExternalSymbol(ES, TF | ARMII::MO_LO16); + HI16 = HI16.addExternalSymbol(ES, TF | ARMII::MO_HI16); + break; + } + default: { + const GlobalValue *GV = MO.getGlobal(); + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); + HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); + break; + } } + LO16.cloneMemRefs(MI); + HI16.cloneMemRefs(MI); + LO16.addImm(Pred).addReg(PredReg); + HI16.addImm(Pred).addReg(PredReg); + if (RequiresBundling) finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator()); - assert(MI.getNumImplicitOperands() == 0 && - "MOVi32imm expected to have no implicit operands"); - + if (isCC) + LO16.add(makeImplicit(MI.getOperand(1))); + TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); + LLVM_DEBUG(dbgs() << "To: "; LO16.getInstr()->dump();); + LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump();); } // The size of the area, accessed by that VLSTM/VLLDM diff --git a/llvm/test/CodeGen/ARM/execute-only-big-stack-frame.ll b/llvm/test/CodeGen/ARM/execute-only-big-stack-frame.ll --- a/llvm/test/CodeGen/ARM/execute-only-big-stack-frame.ll +++ b/llvm/test/CodeGen/ARM/execute-only-big-stack-frame.ll @@ -26,9 +26,11 @@ ; CHECK-MOVW-MOVT-ADD: add sp, [[REG1]] ; CHECK-MOVW-MOVT-ADD-NOT: ldr {{r[0-9]+}}, .{{.*}} ; CHECK-MOVW-MOVT-ADD: movw [[REG2:r[0-9]+]], #65532 +; CHECK-MOVW-MOVT-ADD: movt [[REG2]], #0 ; CHECK-MOVW-MOVT-ADD: add [[REG2]], sp ; CHECK-MOVW-MOVT-ADD-NOT: ldr {{r[0-9]+}}, .{{.*}} ; CHECK-MOVW-MOVT-ADD: movw [[REG3:r[0-9]+]], #65532 +; CHECK-MOVW-MOVT-ADD: movt [[REG3]], #0 ; CHECK-MOVW-MOVT-ADD: add [[REG3]], sp ; CHECK-MOVW-MOVT-ADD-NOT: ldr {{r[0-9]+}}, .{{.*}} ; CHECK-MOVW-MOVT-ADD: movw [[REG4:r[0-9]+]], #0 diff --git a/llvm/test/CodeGen/ARM/execute-only.ll b/llvm/test/CodeGen/ARM/execute-only.ll --- a/llvm/test/CodeGen/ARM/execute-only.ll +++ b/llvm/test/CodeGen/ARM/execute-only.ll @@ -142,89 +142,3 @@ %v = load i32, ptr @external_global ret i32 %v } - -define i32 @test_imm() { -entry: -; CHECK-LABEL: test_imm: -; CHECK: movw [[IMMDEST:r[0-9]+]], #13124 -; CHECK-NEXT: movt [[IMMDEST]], #4386 -; CHECK-NEXT: bx lr -; CHECK-T1-LABEL: test_imm: -; CHECK-T1: movs [[IMMDEST:r[0-9]+]], #17 -; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #8 -; CHECK-T1-NEXT: adds [[IMMDEST]], #34 -; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #8 -; CHECK-T1-NEXT: adds [[IMMDEST]], #51 -; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #8 -; CHECK-T1-NEXT: adds [[IMMDEST]], #68 -; CHECK-T1-NEXT: bx lr - - ret i32 u0x11223344 -} - -define i32 @test_imm_high_half() { -entry: -; CHECK-LABEL: test_imm_high_half: -; CHECK-T2BASE: movw [[IMMDEST:r[0-9]+]], #0 -; CHECK-T2: movs [[IMMDEST:r[0-9]+]], #0 -; CHECK-NEXT: movt [[IMMDEST]], #4386 -; CHECK-NEXT: bx lr -; CHECK-T1-LABEL: test_imm_high_half: -; CHECK-T1: movs [[IMMDEST:r[0-9]+]], #17 -; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #8 -; CHECK-T1-NEXT: adds [[IMMDEST]], #34 -; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #16 -; CHECK-T1-NEXT: bx lr - - ret i32 u0x11220000 -} - -define i32 @test_imm_low_half() { -; CHECK-LABEL: test_imm_low_half: -; CHECK: movw [[IMMDEST:r[0-9]+]], #13124 -; CHECK-NEXT: bx lr -; CHECK-T1-LABEL: test_imm_low_half: -; CHECK-T1: movs [[IMMDEST]], #51 -; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #8 -; CHECK-T1-NEXT: adds [[IMMDEST]], #68 -; CHECK-T1-NEXT: bx lr - -entry: - ret i32 u0x3344 -} - -define i32 @test_imm_middle_bytes() { -; CHECK-LABEL: test_imm_middle_bytes: -; CHECK: movw [[IMMDEST:r[0-9]+]], #13056 -; CHECK-NEXT: movt [[IMMDEST]], #34 -; CHECK-NEXT: bx lr -; CHECK-T1-LABEL: test_imm_middle_bytes: -; CHECK-T1: movs [[IMMDEST]], #34 -; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #8 -; CHECK-T1-NEXT: adds [[IMMDEST]], #51 -; CHECK-T1-NEXT: lsls [[IMMDEST]], [[IMMDEST]], #8 -; CHECK-T1-NEXT: bx lr - - ret i32 u0x223300 -} - -; This struct is sized so that the byval call does an inline memcpy of -; 0x10001 bytes. -%struct.struct_t = type { [65553 x i8] } -@byval_arg = global %struct.struct_t zeroinitializer -declare void @byval_fn(ptr byval(%struct.struct_t)) - -define void @test_byval_call() { -entry: -; CHECK-LABEL: test_byval_call: -; CHECK-T2BASE: movw [[BYVAL_CPYSIZE:r[0-9]+]], #1 -; CHECK-T2: movs [[BYVAL_CPYSIZE:r[0-9]+]], #1 -; CHECK: movt [[BYVAL_CPYSIZE]], #1 -; CHECK-T1-LABEL: test_byval_call: -; CHECK-T1: movs [[BYVAL_CPYSIZE:r[0-9]+]], #1 -; CHECK-T1: lsls [[BYVAL_CPYSIZE]], [[BYVAL_CPYSIZE]], #16 -; CHECK-T1: adds [[BYVAL_CPYSIZE]], #1 - - call void @byval_fn(ptr byval(%struct.struct_t) @byval_arg) - ret void -} diff --git a/llvm/test/CodeGen/ARM/large-stack.ll b/llvm/test/CodeGen/ARM/large-stack.ll --- a/llvm/test/CodeGen/ARM/large-stack.ll +++ b/llvm/test/CodeGen/ARM/large-stack.ll @@ -43,7 +43,11 @@ store i32 0, ptr %tmp ;; are we choosing correct store/tSTRspi pattern for execute-only ; CHECK: movs [[REG:r[0-9]+]], #0x30 -; CHECK-NEXT: lsls [[REG]], [[REG]], #0x18 +; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8 +; CHECK-NEXT: adds [[REG]], #0x0 +; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8 +; CHECK-NEXT: adds [[REG]], #0x0 +; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8 ; CHECK-NEXT: adds [[REG]], #0x8 %tmp1 = load i32, ptr %tmp ret i32 %tmp1 diff --git a/llvm/test/CodeGen/Thumb2/segmented-stacks.ll b/llvm/test/CodeGen/Thumb2/segmented-stacks.ll --- a/llvm/test/CodeGen/Thumb2/segmented-stacks.ll +++ b/llvm/test/CodeGen/Thumb2/segmented-stacks.ll @@ -75,8 +75,9 @@ ; THUMB-LABEL: test_large: ; THUMB: push {r4, r5} -; THUMB-NEXT: mov r5, sp ; THUMB-NEXT: movw r4, #40192 +; THUMB-NEXT: mov r5, sp +; THUMB-NEXT: movt r4, #0 ; THUMB-NEXT: sub r5, r5, r4 ; THUMB-NEXT: mrc p15, #0, r4, c13, c0, #3 ; THUMB-NEXT: ldr.w r4, [r4, #252] @@ -84,6 +85,7 @@ ; THUMB-NEXT: bls .LBB1_2 ; THUMB: movw r4, #40192 +; THUMB-NEXT: movt r4, #0 ; THUMB-NEXT: mov r5, #0 ; THUMB-NEXT: push {lr} ; THUMB-NEXT: bl __morestack @@ -127,8 +129,9 @@ ; THUMB-LABEL: test_fastcc_large: ; THUMB: push {r4, r5} -; THUMB-NEXT: mov r5, sp ; THUMB-NEXT: movw r4, #40192 +; THUMB-NEXT: mov r5, sp +; THUMB-NEXT: movt r4, #0 ; THUMB-NEXT: sub r5, r5, r4 ; THUMB-NEXT: mrc p15, #0, r4, c13, c0, #3 ; THUMB-NEXT: ldr.w r4, [r4, #252] @@ -136,6 +139,7 @@ ; THUMB-NEXT: bls .LBB2_2 ; THUMB: movw r4, #40192 +; THUMB-NEXT: movt r4, #0 ; THUMB-NEXT: mov r5, #0 ; THUMB-NEXT: push {lr} ; THUMB-NEXT: bl __morestack