diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1140,10 +1140,24 @@ case ARM::tLDRpci: case ARM::t2MOVi16: case ARM::t2MOVTi16: + case ARM::tMOVi8: + case ARM::tADDi8: + case ARM::tLSLri: // special cases: // 1) for Thumb1 code we sometimes materialize the constant via constpool // load. - // 2) for Thumb2 execute only code we materialize the constant via + // 2) for Thumb1 execute only code we materialize the constant via the + // following pattern: + // movs r3, #:upper8_15: + // lsls r3, #8 + // adds r3, #:upper0_7: + // lsls r3, #8 + // adds r3, #:lower8_15: + // lsls r3, #8 + // adds r3, #:lower0_7: + // So we need to special-case MOVS, ADDS and LSLS, and keep track of + // where we are in the sequence with the simplest of state machines. + // 3) for Thumb2 execute only code we materialize the constant via // immediate constants in 2 separate instructions (MOVW/MOVT). SrcReg = ~0U; DstReg = MI->getOperand(0).getReg(); @@ -1334,6 +1348,23 @@ Offset = MI->getOperand(2).getImm(); AFI->EHPrologueOffsetInRegs[DstReg] |= (Offset << 16); break; + case ARM::tMOVi8: + Offset = MI->getOperand(2).getImm(); + AFI->EHPrologueOffsetInRegs[DstReg] = Offset; + break; + case ARM::tLSLri: + assert(MI->getOperand(3).getImm() == 8 && + "The shift amount is not equal to 8"); + assert(MI->getOperand(2).getReg() == MI->getOperand(0).getReg() && + "The source register is not equal to the destination register"); + AFI->EHPrologueOffsetInRegs[DstReg] <<= 8; + break; + case ARM::tADDi8: + assert(MI->getOperand(2).getReg() == MI->getOperand(0).getReg() && + "The source register is not equal to the destination register"); + Offset = MI->getOperand(3).getImm(); + AFI->EHPrologueOffsetInRegs[DstReg] += Offset; + break; case ARM::t2PAC: case ARM::t2PACBTI: AFI->EHPrologueRemappedRegs[ARM::R12] = ARM::RA_AUTH_CODE; diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -81,8 +81,9 @@ MachineFunction &MF = *MBB.getParent(); const ARMSubtarget &ST = MF.getSubtarget(); if (ST.genExecuteOnly()) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ScratchReg) - .addImm(NumBytes).setMIFlags(MIFlags); + unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm; + BuildMI(MBB, MBBI, dl, TII.get(XOInstr), ScratchReg) + .addImm(NumBytes).setMIFlags(MIFlags); } else { MRI.emitLoadConstPool(MBB, MBBI, dl, ScratchReg, 0, NumBytes, ARMCC::AL, 0, MIFlags); diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp --- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -116,9 +116,10 @@ PredReg, MIFlags); } -/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize -/// a destreg = basereg + immediate in Thumb code. Materialize the immediate -/// in a register using mov / mvn sequences or load the immediate from a +/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize a +/// destreg = basereg + immediate in Thumb code. Materialize the immediate in a +/// register using mov / mvn (armv6-M >) sequences, movs / lsls / adds / lsls / +/// adds / lsls / adds sequences (armv6-M) or load the immediate from a /// constpool entry. static void emitThumbRegPlusImmInReg( MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, @@ -159,7 +160,8 @@ .addReg(LdReg, RegState::Kill) .setMIFlags(MIFlags); } else if (ST.genExecuteOnly()) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), LdReg) + unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm; + BuildMI(MBB, MBBI, dl, TII.get(XOInstr), LdReg) .addImm(NumBytes).setMIFlags(MIFlags); } else MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes, ARMCC::AL, 0, diff --git a/llvm/test/CodeGen/ARM/large-stack.ll b/llvm/test/CodeGen/ARM/large-stack.ll --- a/llvm/test/CodeGen/ARM/large-stack.ll +++ b/llvm/test/CodeGen/ARM/large-stack.ll @@ -1,21 +1,54 @@ ; RUN: llc -mtriple=arm-eabi %s -o /dev/null -; RUN: llc -mtriple=thumbv6m-eabi -mattr=+execute-only %s -o - +; RUN: llc -mtriple=thumbv6m-eabi -mattr=+execute-only %s -o - -filetype=obj | \ +; RUN: llvm-objdump -d --no-leading-addr --no-show-raw-insn - | Filecheck %s define void @test1() { - %tmp = alloca [ 64 x i32 ] , align 4 +; CHECK-LABEL: : +;; are we using correct prologue immediate materialization pattern for +;; execute only +; CHECK: sub sp, #0x100 +%tmp = alloca [ 64 x i32 ] , align 4 ret void } define void @test2() { +; CHECK-LABEL: : +;; are we using correct prologue immediate materialization pattern for +;; execute-only +; CHECK: movs [[REG:r[0-9]+]], #0xff +; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8 +; CHECK-NEXT: adds [[REG]], #0xff +; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8 +; CHECK-NEXT: adds [[REG]], #0xef +; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8 +; CHECK-NEXT: adds [[REG]], #0xb8 %tmp = alloca [ 4168 x i8 ] , align 4 ret void } define i32 @test3() { +;; are we using correct prologue immediate materialization pattern for +;; execute-only +; CHECK-LABEL: : +; CHECK: movs [[REG:r[0-9]+]], #0xcf +; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8 +; CHECK-NEXT: adds [[REG]], #0xff +; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8 +; CHECK-NEXT: adds [[REG]], #0xff +; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8 +; CHECK-NEXT: adds [[REG]], #0xf0 %retval = alloca i32, align 4 %tmp = alloca i32, align 4 - %a = alloca [805306369 x i8], align 16 + %a = alloca [u0x30000001 x i8], align 16 store i32 0, ptr %tmp +;; are we choosing correct store/tSTRspi pattern for execute-only +; CHECK: movs [[REG:r[0-9]+]], #0x30 +; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8 +; CHECK-NEXT: adds [[REG]], #0x0 +; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8 +; CHECK-NEXT: adds [[REG]], #0x0 +; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8 +; CHECK-NEXT: adds [[REG]], #0x8 %tmp1 = load i32, ptr %tmp ret i32 %tmp1 }