diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -199,6 +199,15 @@ O << ":lower16:"; else if (TF & ARMII::MO_HI16) O << ":upper16:"; + else if (TF & ARMII::MO_LO_0_7) + O << ":lower0_7:"; + else if (TF & ARMII::MO_LO_8_15) + O << ":lower8_15:"; + else if (TF & ARMII::MO_HI_0_7) + O << ":upper0_7:"; + else if (TF & ARMII::MO_HI_8_15) + O << ":upper8_15:"; + GetARMGVSymbol(MO.getGlobal(), TF)->print(O, MAI); printOffset(MO.getOffset(), O); } @@ -228,6 +237,14 @@ O << ":lower16:"; else if (TF == ARMII::MO_HI16) O << ":upper16:"; + else if (TF == ARMII::MO_LO_0_7) + O << ":lower0_7:"; + else if (TF == ARMII::MO_LO_8_15) + O << ":lower8_15:"; + else if (TF == ARMII::MO_HI_0_7) + O << ":upper0_7:"; + else if (TF == ARMII::MO_HI_8_15) + O << ":upper8_15:"; O << MO.getImm(); break; } diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -3327,7 +3327,7 @@ MachineRegisterInfo *MRI) const { // Fold large immediates into add, sub, or, xor. unsigned DefOpc = DefMI.getOpcode(); - if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) + if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) // && DefOpc != ARM::t2MOVi32imm) return false; if (!DefMI.getOperand(1).isImm()) // Could be t2MOVi32imm @xx @@ -5538,7 +5538,10 @@ using namespace ARMII; static const std::pair TargetFlags[] = { - {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}}; + {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}, + {MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"}, + {MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"}, + }; return ArrayRef(TargetFlags); } diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -71,6 +71,8 @@ void ExpandVTBL(MachineBasicBlock::iterator &MBBI, unsigned Opc, bool IsExt); void ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI); + void ExpandT1MOV32BitImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI); void ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); void CMSEClearGPRegs(MachineBasicBlock &MBB, @@ -969,6 +971,99 @@ return NewMO; } +void ARMExpandPseudo::ExpandT1MOV32BitImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI) { + MachineInstr &MI = *MBBI; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + const MachineOperand &MO = MI.getOperand(1); + MachineInstrBuilder Upper8_15, LSL_U8_15, Upper0_7, Lower8_15, Lower0_7; + unsigned MIFlags = MI.getFlags(); + + LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); + + Upper8_15 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tMOVi8), DstReg) + .addReg(ARM::CPSR, RegState::Kill); + + LSL_U8_15 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tLSLri), DstReg) + .addReg(ARM::CPSR, RegState::Kill) + .addReg(DstReg) + .addImm(8) + .add(predOps(ARMCC::AL)) + .setMIFlags(MIFlags); + + Upper0_7 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tADDi8), DstReg) + .addReg(ARM::CPSR, RegState::Kill) + .addReg(DstReg); + + MachineInstr *LSL_U0_7 = MBB.getParent()->CloneMachineInstr(LSL_U8_15); + MBB.insert(MBBI, LSL_U0_7); + + Lower8_15 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tADDi8), DstReg) + .addReg(ARM::CPSR, RegState::Kill) + .addReg(DstReg); + + MachineInstr *LSL_L8_15 = MBB.getParent()->CloneMachineInstr(LSL_U8_15); + MBB.insert(MBBI, LSL_L8_15); + + Lower0_7 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tADDi8)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(ARM::CPSR, RegState::Kill) + .addReg(DstReg); + + Upper8_15.setMIFlags(MIFlags); + Upper0_7.setMIFlags(MIFlags); + Lower8_15.setMIFlags(MIFlags); + Lower0_7.setMIFlags(MIFlags); + + switch (MO.getType()) { + case MachineOperand::MO_Immediate: { + unsigned Imm = MO.getImm(); + unsigned Hi8_15 = (Imm >> 24) & 0xff; + unsigned Hi0_7 = (Imm >> 16) & 0xff; + unsigned Lo8_15 = (Imm >> 8) & 0xff; + unsigned Lo0_7 = Imm & 0xff; + Upper8_15 = Upper8_15.addImm(Hi8_15); + Upper0_7 = Upper0_7.addImm(Hi0_7); + Lower8_15 = Lower8_15.addImm(Lo8_15); + Lower0_7 = Lower0_7.addImm(Lo0_7); + break; + } + case MachineOperand::MO_ExternalSymbol: { + const char *ES = MO.getSymbolName(); + unsigned TF = MO.getTargetFlags(); + Upper8_15 = Upper8_15.addExternalSymbol(ES, TF | ARMII::MO_HI_8_15); + Upper0_7 = Upper0_7.addExternalSymbol(ES, TF | ARMII::MO_HI_0_7); + Lower8_15 = Lower8_15.addExternalSymbol(ES, TF | ARMII::MO_LO_8_15); + Lower0_7 = Lower0_7.addExternalSymbol(ES, TF | ARMII::MO_LO_0_7); + break; + } + default: { + const GlobalValue *GV = MO.getGlobal(); + unsigned TF = MO.getTargetFlags(); + Upper8_15 = Upper8_15.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI_8_15); + Upper0_7 = Upper0_7.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI_0_7); + Lower8_15 = Lower8_15.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO_8_15); + Lower0_7 = Lower0_7.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO_0_7); + break; + } + } + + Upper8_15 = Upper8_15.add(predOps(ARMCC::AL)); + Upper0_7 = Upper0_7.add(predOps(ARMCC::AL)); + Lower8_15 = Lower8_15.add(predOps(ARMCC::AL)); + Lower0_7 = Lower0_7.add(predOps(ARMCC::AL)); + + MI.eraseFromParent(); + LLVM_DEBUG(dbgs() << "To: "; Upper8_15.getInstr()->dump();); + LLVM_DEBUG(dbgs() << "And: "; LSL_U8_15.getInstr()->dump();); + LLVM_DEBUG(dbgs() << "And: "; Upper0_7.getInstr()->dump();); + LLVM_DEBUG(dbgs() << "And: "; LSL_U0_7->dump();); + LLVM_DEBUG(dbgs() << "And: "; Lower8_15.getInstr()->dump();); + LLVM_DEBUG(dbgs() << "And: "; LSL_L8_15->dump();); + LLVM_DEBUG(dbgs() << "And: "; Lower0_7.getInstr()->dump();); +} + void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; @@ -2658,6 +2753,10 @@ ExpandMOV32BitImm(MBB, MBBI); return true; + case ARM::t1MOVi32imm: + ExpandT1MOV32BitImm(MBB, MBBI); + return true; + case ARM::SUBS_PC_LR: { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC) diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -550,6 +550,7 @@ if (IsPositionIndependent) Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel; else + // TODO: handle t1MOVi32imm, if only as not to break Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg).addGlobalAddress(GV, 0, TF)); diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -3087,6 +3087,7 @@ BuildMI(McrMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0) .addImm(AlignedStackSize); } else { + // TODO: Looks like we need tMOVi32imm here when building for XO auto MBBI = McrMBB->end(); auto RegInfo = STI.getRegisterInfo(); RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0, @@ -3192,6 +3193,7 @@ BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0) .addImm(AlignedStackSize); } else { + // Looks like we need tMOVi32imm here when building for XO auto MBBI = AllocMBB->end(); auto RegInfo = STI.getRegisterInfo(); RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0, @@ -3225,6 +3227,7 @@ BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg1) .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())); } else { + // Looks like we need tMOVi32imm here when building for XO auto MBBI = AllocMBB->end(); auto RegInfo = STI.getRegisterInfo(); RegInfo->emitLoadConstPool( diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3701,7 +3701,8 @@ case ISD::Constant: { unsigned Val = cast(N)->getZExtValue(); // If we can't materialize the constant we need to use a literal pool - if (ConstantMaterializationCost(Val, Subtarget) > 2) { + if (ConstantMaterializationCost(Val, Subtarget) > 2 && + !Subtarget->genExecuteOnly()) { SDValue CPIdx = CurDAG->getTargetConstantPool( ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), TLI->getPointerTy(CurDAG->getDataLayout())); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2663,6 +2663,7 @@ // When generating execute-only code we use movw movt pair. // Currently execute-only is only available for architectures that // support movw movt, so we are safe to assume that. + // TODO: make 6vm XO friendly: if (Subtarget->genExecuteOnly()) { assert(Subtarget->useMovt() && "long-calls with execute-only requires movt and movw!"); @@ -3937,6 +3938,7 @@ } else if (Subtarget->isRWPI() && !IsRO) { // SB-relative. SDValue RelAddr; + // TODO: make 6vm XO friendly if (Subtarget->useMovt()) { ++NumMovwMovt; SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL); @@ -3956,8 +3958,10 @@ } // If we have T2 ops, we can materialize the address directly via movt/movw - // pair. This is always cheaper. - if (Subtarget->useMovt()) { + // pair. This is always cheaper. If need to generate Execute Only code, and we + // only have Thumb1 available, we can't use a constant pool and are forced to + // use immediate relocations. + if (Subtarget->useMovt() || Subtarget->genT1ExecuteOnly()) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -1603,7 +1603,23 @@ IIC_iLoad_i, [(set tGPR:$dst, (ARMWrapper tglobaladdr:$src))]>, - Requires<[IsThumb, DontUseMovt]>; + // TODO: we need a requires that differentiates between xo and non-xo on armv6 + Requires<[IsThumb, DontUseMovt, DontGenT1ExecuteOnly]>; + +// 32-bit immediate using mov/add with the 4 :lower0_7: to :upper8_15: +// relocations. +// This is a single pseudo instruction to make it re-materializable. +// FIXME: Remove this when we can do generalized remat. +let isReMaterializable = 1, isMoveImm = 1, Size = 16, hasNoSchedulingInfo = 1 in +def t1MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), NoItinerary, + [(set rGPR:$dst, (i32 imm:$src))]>, + Requires<[GenT1ExecuteOnly]>; + +def : ARMPat<(ARMWrapper tglobaladdr :$dst), (t1MOVi32imm tglobaladdr :$dst)>, + Requires<[GenT1ExecuteOnly]>; +def : ARMPat<(ARMWrapper texternalsym :$dst), (t1MOVi32imm texternalsym :$dst)>, + Requires<[GenT1ExecuteOnly]>; + // TLS globals def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr), @@ -1613,7 +1629,6 @@ (tLDRLIT_ga_abs tglobaltlsaddr:$addr)>, Requires<[IsThumb, DontUseMovt]>; - // JumpTable def : T1Pat<(ARMWrapperJT tjumptable:$dst), (tLEApcrelJT tjumptable:$dst)>; diff --git a/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/llvm/lib/Target/ARM/ARMMCInstLower.cpp --- a/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -58,6 +58,26 @@ MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext); Expr = ARMMCExpr::createUpper16(Expr, OutContext); break; + case ARMII::MO_LO_0_7: + Expr = + MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext); + Expr = ARMMCExpr::createLower0_7(Expr, OutContext); + break; + case ARMII::MO_LO_8_15: + Expr = + MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext); + Expr = ARMMCExpr::createLower8_15(Expr, OutContext); + break; + case ARMII::MO_HI_0_7: + Expr = + MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext); + Expr = ARMMCExpr::createUpper0_7(Expr, OutContext); + break; + case ARMII::MO_HI_8_15: + Expr = + MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext); + Expr = ARMMCExpr::createUpper8_15(Expr, OutContext); + break; } if (!MO.isJTI() && MO.getOffset()) diff --git a/llvm/lib/Target/ARM/ARMPredicates.td b/llvm/lib/Target/ARM/ARMPredicates.td --- a/llvm/lib/Target/ARM/ARMPredicates.td +++ b/llvm/lib/Target/ARM/ARMPredicates.td @@ -222,6 +222,8 @@ } def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">; +def GenT1ExecuteOnly : Predicate<"Subtarget->genT1ExecuteOnly()">; +def DontGenT1ExecuteOnly : Predicate<"!Subtarget->genT1ExecuteOnly()">; // Armv8.5-A extensions def HasSB : Predicate<"Subtarget->hasSB()">, diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -451,6 +451,8 @@ bool useMovt() const; + bool genT1ExecuteOnly() const; + bool supportsTailCall() const { return SupportsTailCall; } bool allowsUnalignedMem() const { return !StrictAlign; } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -187,10 +187,12 @@ // Assert this for now to make the change obvious. assert(hasV6T2Ops() || !hasThumb2()); - // Execute only support requires movt support if (genExecuteOnly()) { - NoMovt = false; - assert(hasV8MBaselineOps() && "Cannot generate execute-only code for this target"); + // Execute only support for >= v8-M Baseline requires movt support + if (hasV8MBaselineOps()) + NoMovt = false; + if (!hasV6MOps()) + report_fatal_error("Cannot generate execute-only code for this target"); } // Keep a pointer to static instruction cost data for the specified CPU. @@ -434,6 +436,10 @@ (isTargetWindows() || !OptMinSize || genExecuteOnly()); } +bool ARMSubtarget::genT1ExecuteOnly() const { + return genExecuteOnly() && isThumb1Only(); +} + bool ARMSubtarget::useFastISel() const { // Enable fast-isel for any target, for testing only. if (ForceFastISel) diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -255,7 +255,7 @@ /// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects /// just that part of the flag set. - MO_OPTION_MASK = 0x3, + MO_OPTION_MASK = 0xf03, /// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the /// reference is actually to the ".refptr.FOO" symbol. This is used for @@ -287,11 +287,25 @@ /// example). MO_NONLAZY = 0x80, - // It's undefined behaviour if an enum overflows the range between its - // smallest and largest values, but since these are |ed together, it can - // happen. Put a sentinel in (values of this enum are stored as "unsigned - // char"). - MO_UNUSED_MAXIMUM = 0xff + /// MO_LO_0_7 - On a symbol operand, this represents a relocation containing + /// bits 0 through 7 of the address. Used only with Thumb1 MOV and ADD + // instructions. + MO_LO_0_7 = 0x100, + + /// MO_LO_8_15 - On a symbol operand, this represents a relocation containing + /// bits 8 through 15 of the address. Used only with Thumb1 MOV and ADD + // instructions. + MO_LO_8_15 = 0x200, + + /// MO_HI_0_7 - On a symbol operand, this represents a relocation containing + /// bits 16 through 23 of the address. Used only with Thumb1 MOV and ADD + // instructions. + MO_HI_0_7 = 0x400, + + /// MO_HI_8_15 - On a symbol operand, this represents a relocation containing + /// bits 24 through 31 of the address. Used only with Thumb1 MOV and ADD + // instructions. + MO_HI_8_15 = 0x800 }; enum { diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -81,6 +81,7 @@ MachineFunction &MF = *MBB.getParent(); const ARMSubtarget &ST = MF.getSubtarget(); if (ST.genExecuteOnly()) { + // TODO: seems like this should use tMOVi32imm for t1 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ScratchReg) .addImm(NumBytes).setMIFlags(MIFlags); } else { diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -143,6 +143,7 @@ if (TM.isPositionIndependent()) expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::tLDRi); else + // TODO: should add tMOVi32imm if XO expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_abs, ARM::tLDRi); } diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp --- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -159,6 +159,7 @@ .addReg(LdReg, RegState::Kill) .setMIFlags(MIFlags); } else if (ST.genExecuteOnly()) { + // TODO: looks like for t1, we need to check and generate tMOVi32imm BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), LdReg) .addImm(NumBytes).setMIFlags(MIFlags); } else