diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1171,11 +1171,15 @@ case ARM::ADDri: case ARM::t2ADDri: case ARM::t2ADDri12: + case ARM::t2ADDspImm: + case ARM::t2ADDspImm12: Offset = -MI->getOperand(2).getImm(); break; case ARM::SUBri: case ARM::t2SUBri: case ARM::t2SUBri12: + case ARM::t2SUBspImm: + case ARM::t2SUBspImm12: Offset = MI->getOperand(2).getImm(); break; case ARM::tSUBspi: diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -3257,22 +3257,26 @@ } break; case ARM::t2ADDrr: - case ARM::t2SUBrr: + case ARM::t2SUBrr: { if (UseOpc == ARM::t2SUBrr && Commute) return false; // ADD/SUB are special because they're essentially the same operation, so // we can handle a larger range of immediates. + const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP; + const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri; + const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri; if (ARM_AM::isT2SOImmTwoPartVal(ImmVal)) - NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri; + NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB; else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) { ImmVal = -ImmVal; - NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri; + NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD; } else return false; SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); break; + } case ARM::t2ORRrr: case ARM::t2EORrr: if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) @@ -3292,7 +3296,8 @@ unsigned OpIdx = Commute ? 2 : 1; Register Reg1 = UseMI.getOperand(OpIdx).getReg(); bool isKill = UseMI.getOperand(OpIdx).isKill(); - Register NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); + const TargetRegisterClass *TRC = MRI->getRegClass(Reg); + Register NewReg = MRI->createVirtualRegister(TRC); BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc), NewReg) .addReg(Reg1, getKillRegState(isKill)) @@ -3304,6 +3309,18 @@ UseMI.getOperand(1).setIsKill(); UseMI.getOperand(2).ChangeToImmediate(SOImmValV2); DefMI.eraseFromParent(); + // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP. + // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm]. + // Then the below code will not be needed, as the input/output register + // classes will be rgpr or gprSP. + // For now, we fix the UseMI operand explicitly here: + switch(NewUseOpc){ + case ARM::t2ADDspImm: + case ARM::t2SUBspImm: + case ARM::t2ADDri: + case ARM::t2SUBri: + MRI->setRegClass(UseMI.getOperand(0).getReg(), TRC); + } return true; } diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -918,10 +918,26 @@ // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { + def spImm : T2sTwoRegImm< + (outs GPRsp:$Rd), (ins GPRsp:$Rn, t2_so_imm:$imm), IIC_iALUi, + opc, ".w\t$Rd, $Rn, $imm", + []>, + Sched<[WriteALU, ReadALU]> { + let Rn = 13; + let Rd = 13; + + let Inst{31-27} = 0b11110; + let Inst{25-24} = 0b01; + let Inst{23-21} = op23_21; + let Inst{15} = 0; + + let DecoderMethod = "DecodeT2AddSubSPImm"; + } + def ri : T2sTwoRegImm< - (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi, + (outs rGPR:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi, opc, ".w\t$Rd, $Rn, $imm", - [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]>, + [(set rGPR:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]>, Sched<[WriteALU, ReadALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; @@ -932,9 +948,9 @@ } // 12-bit imm def ri12 : T2I< - (outs GPRnopc:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi, + (outs rGPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi, !strconcat(opc, "w"), "\t$Rd, $Rn, $imm", - [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]>, + [(set rGPR:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]>, Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; @@ -950,6 +966,26 @@ let Inst{11-8} = Rd; let Inst{7-0} = imm{7-0}; } + def spImm12 : T2I< + (outs GPRsp:$Rd), (ins GPRsp:$Rn, imm0_4095:$imm), IIC_iALUi, + !strconcat(opc, "w"), "\t$Rd, $Rn, $imm", + []>, + Sched<[WriteALU, ReadALU]> { + bits<4> Rd = 13; + bits<4> Rn = 13; + bits<12> imm; + let Inst{31-27} = 0b11110; + let Inst{26} = imm{11}; + let Inst{25-24} = 0b10; + let Inst{23-21} = op23_21; + let Inst{20} = 0; // The S bit. + let Inst{19-16} = Rn; + let Inst{15} = 0; + let Inst{14-12} = imm{10-8}; + let Inst{11-8} = Rd; + let Inst{7-0} = imm{7-0}; + let DecoderMethod = "DecodeT2AddSubSPImm"; + } // register def rr : T2sThreeReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm", @@ -2267,19 +2303,29 @@ (t2ADCri rGPR:$rd, rGPR:$rn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>; def : t2InstSubst<"add${s}${p}.w $rd, $rn, $imm", - (t2SUBri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; -def : t2InstSubst<"addw${p} $rd, $rn, $imm", - (t2SUBri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>; + (t2SUBri rGPR:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; def : t2InstSubst<"sub${s}${p}.w $rd, $rn, $imm", - (t2ADDri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; -def : t2InstSubst<"subw${p} $rd, $rn, $imm", - (t2ADDri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>; + (t2ADDri rGPR:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; def : t2InstSubst<"subw${p} $Rd, $Rn, $imm", - (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; + (t2ADDri12 rGPR:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; def : t2InstSubst<"sub${s}${p} $rd, $rn, $imm", - (t2ADDri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; + (t2ADDri rGPR:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; def : t2InstSubst<"sub${p} $rd, $rn, $imm", - (t2ADDri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>; + (t2ADDri12 rGPR:$rd, GPR:$rn, imm0_4095_neg:$imm, pred:$p)>; + +// SP to SP alike +def : t2InstSubst<"add${s}${p}.w $rd, $rn, $imm", + (t2SUBspImm GPRsp:$rd, GPRsp:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; +def : t2InstSubst<"sub${s}${p}.w $rd, $rn, $imm", + (t2ADDspImm GPRsp:$rd, GPRsp:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; +def : t2InstSubst<"subw${p} $Rd, $Rn, $imm", + (t2ADDspImm12 GPRsp:$Rd, GPRsp:$Rn, imm0_4095_neg:$imm, pred:$p)>; +def : t2InstSubst<"sub${s}${p} $rd, $rn, $imm", + (t2ADDspImm GPRsp:$rd, GPRsp:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; +def : t2InstSubst<"sub${p} $rd, $rn, $imm", + (t2ADDspImm12 GPRsp:$rd, GPRsp:$rn, imm0_4095_neg:$imm, pred:$p)>; + + // RSB defm t2RSB : T2I_rbin_irs <0b1110, "rsb", sub>; @@ -2295,12 +2341,12 @@ // The AddedComplexity preferences the first variant over the others since // it can be shrunk to a 16-bit wide encoding, while the others cannot. let AddedComplexity = 1 in -def : T2Pat<(add GPR:$src, imm1_255_neg:$imm), - (t2SUBri GPR:$src, imm1_255_neg:$imm)>; -def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), - (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>; -def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), - (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; +def : T2Pat<(add rGPR:$src, imm1_255_neg:$imm), + (t2SUBri rGPR:$src, imm1_255_neg:$imm)>; +def : T2Pat<(add rGPR:$src, t2_so_imm_neg:$imm), + (t2SUBri rGPR:$src, t2_so_imm_neg:$imm)>; +def : T2Pat<(add rGPR:$src, imm0_4095_neg:$imm), + (t2SUBri12 rGPR:$src, imm0_4095_neg:$imm)>; def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm), (t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; @@ -2799,10 +2845,10 @@ // Thumb2SizeReduction's chances later on we select a t2ADD for an or where // possible. def : T2Pat<(or AddLikeOrOp:$Rn, t2_so_imm:$imm), - (t2ADDri $Rn, t2_so_imm:$imm)>; + (t2ADDri rGPR:$Rn, t2_so_imm:$imm)>; def : T2Pat<(or AddLikeOrOp:$Rn, imm0_4095:$Rm), - (t2ADDri12 $Rn, imm0_4095:$Rm)>; + (t2ADDri12 rGPR:$Rn, imm0_4095:$Rm)>; def : T2Pat<(or AddLikeOrOp:$Rn, non_imm32:$Rm), (t2ADDrr $Rn, $Rm)>; @@ -4666,10 +4712,10 @@ // Aliases for ADD without the ".w" optional width specifier. def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", - (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, + (t2ADDri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"add${p} $Rd, $Rn, $imm", - (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; + (t2ADDri12 rGPR:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm", (t2ADDrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm", @@ -4677,9 +4723,11 @@ pred:$p, cc_out:$s)>; // ... and with the destination and source register combined. def : t2InstAlias<"add${s}${p} $Rdn, $imm", - (t2ADDri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; + (t2ADDri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"add${p} $Rdn, $imm", - (t2ADDri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>; + (t2ADDri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"addw${p} $Rdn, $imm", + (t2ADDri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095:$imm, pred:$p)>; def : t2InstAlias<"add${s}${p} $Rdn, $Rm", (t2ADDrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm", @@ -4688,33 +4736,33 @@ // add w/ negative immediates is just a sub. def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm", - (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, + (t2SUBri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; def : t2InstSubst<"add${p} $Rd, $Rn, $imm", - (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; + (t2SUBri12 rGPR:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; def : t2InstSubst<"add${s}${p} $Rdn, $imm", - (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p, + (t2SUBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; def : t2InstSubst<"add${p} $Rdn, $imm", - (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>; + (t2SUBri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095_neg:$imm, pred:$p)>; def : t2InstSubst<"add${s}${p}.w $Rd, $Rn, $imm", - (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, + (t2SUBri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; def : t2InstSubst<"addw${p} $Rd, $Rn, $imm", - (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; + (t2SUBri12 rGPR:$Rd, rGPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; def : t2InstSubst<"add${s}${p}.w $Rdn, $imm", - (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p, + (t2SUBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; def : t2InstSubst<"addw${p} $Rdn, $imm", - (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>; + (t2SUBri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095_neg:$imm, pred:$p)>; // Aliases for SUB without the ".w" optional width specifier. def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm", - (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; + (t2SUBri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${p} $Rd, $Rn, $imm", - (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; + (t2SUBri12 rGPR:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $Rm", (t2SUBrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm", @@ -4722,9 +4770,11 @@ pred:$p, cc_out:$s)>; // ... and with the destination and source register combined. def : t2InstAlias<"sub${s}${p} $Rdn, $imm", - (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; + (t2SUBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${p} $Rdn, $imm", - (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>; + (t2SUBri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"subw${p} $Rdn, $imm", + (t2SUBri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095:$imm, pred:$p)>; def : t2InstAlias<"sub${s}${p}.w $Rdn, $Rm", (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${s}${p} $Rdn, $Rm", @@ -4733,6 +4783,65 @@ (t2SUBrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; +// SP to SP alike aliases +// Aliases for ADD without the ".w" optional width specifier. +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", + (t2ADDspImm GPRsp:$Rd, GPRsp:$Rn, t2_so_imm:$imm, pred:$p, + cc_out:$s)>; +def : t2InstAlias<"add${p} $Rd, $Rn, $imm", + (t2ADDspImm12 GPRsp:$Rd, GPRsp:$Rn, imm0_4095:$imm, pred:$p)>; +// ... and with the destination and source register combined. +def : t2InstAlias<"add${s}${p} $Rdn, $imm", + (t2ADDspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; + +def : t2InstAlias<"add${s}${p}.w $Rdn, $imm", + (t2ADDspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; + +def : t2InstAlias<"add${p} $Rdn, $imm", + (t2ADDspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095:$imm, pred:$p)>; + +def : t2InstAlias<"addw${p} $Rdn, $imm", + (t2ADDspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095:$imm, pred:$p)>; + +// add w/ negative immediates is just a sub. +def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm", + (t2SUBspImm GPRsp:$Rd, GPRsp:$Rn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstSubst<"add${p} $Rd, $Rn, $imm", + (t2SUBspImm12 GPRsp:$Rd, GPRsp:$Rn, imm0_4095_neg:$imm, pred:$p)>; +def : t2InstSubst<"add${s}${p} $Rdn, $imm", + (t2SUBspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstSubst<"add${p} $Rdn, $imm", + (t2SUBspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095_neg:$imm, pred:$p)>; + +def : t2InstSubst<"add${s}${p}.w $Rd, $Rn, $imm", + (t2SUBspImm GPRsp:$Rd, GPRsp:$Rn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstSubst<"addw${p} $Rd, $Rn, $imm", + (t2SUBspImm12 GPRsp:$Rd, GPRsp:$Rn, imm0_4095_neg:$imm, pred:$p)>; +def : t2InstSubst<"add${s}${p}.w $Rdn, $imm", + (t2SUBspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstSubst<"addw${p} $Rdn, $imm", + (t2SUBspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095_neg:$imm, pred:$p)>; + + +// Aliases for SUB without the ".w" optional width specifier. +def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm", + (t2SUBspImm GPRsp:$Rd, GPRsp:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sub${p} $Rd, $Rn, $imm", + (t2SUBspImm12 GPRsp:$Rd, GPRsp:$Rn, imm0_4095:$imm, pred:$p)>; +// ... and with the destination and source register combined. +def : t2InstAlias<"sub${s}${p} $Rdn, $imm", + (t2SUBspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sub${s}${p}.w $Rdn, $imm", + (t2SUBspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sub${p} $Rdn, $imm", + (t2SUBspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"subw${p} $Rdn, $imm", + (t2SUBspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095:$imm, pred:$p)>; + // Alias for compares without the ".w" optional width specifier. def : t2InstAlias<"cmn${p} $Rn, $Rm", (t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; @@ -4989,10 +5098,16 @@ pred:$p, cc_out:$s)>; // Likewise, "add Rd, t2_so_imm_neg" -> sub def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm", - (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, + (t2SUBri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, + pred:$p, cc_out:$s)>; +def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm", + (t2SUBspImm GPRsp:$Rd, GPRsp:$Rn, t2_so_imm_neg:$imm, + pred:$p, cc_out:$s)>; +def : t2InstSubst<"add${s}${p} $Rd, $imm", + (t2SUBri rGPR:$Rd, rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; def : t2InstSubst<"add${s}${p} $Rd, $imm", - (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm, + (t2SUBspImm GPRsp:$Rd, GPRsp:$Rd, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; // Same for CMP <--> CMN via t2_so_imm_neg def : t2InstSubst<"cmp${p} $Rd, $imm", diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -696,18 +696,23 @@ return nullptr; } - int BaseOpc = - isThumb2 ? ARM::t2ADDri : - (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : - (isThumb1 && Offset < 8) ? ARM::tADDi3 : - isThumb1 ? ARM::tADDi8 : ARM::ADDri; + int BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2ADDspImm + : ARM::t2ADDri) + : (isThumb1 && Base == ARM::SP) + ? ARM::tADDrSPi + : (isThumb1 && Offset < 8) + ? ARM::tADDi3 + : isThumb1 ? ARM::tADDi8 : ARM::ADDri; if (Offset < 0) { - Offset = - Offset; - BaseOpc = - isThumb2 ? ARM::t2SUBri : - (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : - isThumb1 ? ARM::tSUBi8 : ARM::SUBri; + // FIXME: There are no Thumb1 load/store instructions with negative + // offsets. So the Base != ARM::SP might be unnecessary. + Offset = -Offset; + BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2SUBspImm + : ARM::t2SUBri) + : (isThumb1 && Offset < 8 && Base != ARM::SP) + ? ARM::tSUBi3 + : isThumb1 ? ARM::tSUBi8 : ARM::SUBri; } if (!TL->isLegalAddImmediate(Offset)) @@ -1186,8 +1191,10 @@ case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; case ARM::t2SUBri: + case ARM::t2SUBspImm: case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; case ARM::t2ADDri: + case ARM::t2ADDspImm: case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -6554,7 +6554,8 @@ // Check against T3. If the second register is the PC, this is an // alternate form of ADR, which uses encoding T4, so check for that too. if (static_cast(*Operands[4]).getReg() != ARM::PC && - static_cast(*Operands[5]).isT2SOImm()) + (static_cast(*Operands[5]).isT2SOImm() || + static_cast(*Operands[5]).isT2SOImmNeg())) return false; // Otherwise, we use encoding T4, which does not have a cc_out @@ -6609,9 +6610,34 @@ static_cast(*Operands[1]).getReg() == 0 && (static_cast(*Operands[4]).isImm() || (Operands.size() == 6 && - static_cast(*Operands[5]).isImm()))) - return true; - + static_cast(*Operands[5]).isImm()))) { + // Thumb2 (add|sub){s}{p}.w GPRnopc, sp, #{T2SOImm} has cc_out + return (!(isThumbTwo() && + (static_cast(*Operands[4]).isT2SOImm() || + static_cast(*Operands[4]).isT2SOImmNeg()))); + } + // Fixme: Should join all the thumb+thumb2 (add|sub) in a single if case + // Thumb2 ADD r0, #4095 -> ADDW r0, r0, #4095 (T4) + // Thumb2 SUB r0, #4095 -> SUBW r0, r0, #4095 + if (isThumbTwo() && (Mnemonic == "add" || Mnemonic == "sub") && + (Operands.size() == 5) && + static_cast(*Operands[3]).isReg() && + static_cast(*Operands[3]).getReg() != ARM::SP && + static_cast(*Operands[3]).getReg() != ARM::PC && + static_cast(*Operands[1]).getReg() == 0 && + static_cast(*Operands[4]).isImm()) { + const ARMOperand &IMM = static_cast(*Operands[4]); + if (IMM.isT2SOImm() || IMM.isT2SOImmNeg()) + return false; // add.w / sub.w + if (const MCConstantExpr *CE = dyn_cast(IMM.getImm())) { + const int64_t Value = CE->getValue(); + // Thumb1 imm8 sub / add + if ((Value < ((1 << 7) - 1) << 2) && inITBlock() && (!(Value & 3)) && + isARMLowRegister(static_cast(*Operands[3]).getReg())) + return false; + return true; // Thumb2 T4 addw / subw + } + } return false; } @@ -7707,12 +7733,8 @@ } break; - case ARM::t2ADDri: - case ARM::t2ADDri12: case ARM::t2ADDrr: case ARM::t2ADDrs: - case ARM::t2SUBri: - case ARM::t2SUBri12: case ARM::t2SUBrr: case ARM::t2SUBrs: if (Inst.getOperand(0).getReg() == ARM::SP && @@ -9750,23 +9772,33 @@ } break; case ARM::t2ADDri12: - // If the immediate fits for encoding T3 (t2ADDri) and the generic "add" - // mnemonic was used (not "addw"), encoding T3 is preferred. - if (static_cast(*Operands[0]).getToken() != "add" || - ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) - break; - Inst.setOpcode(ARM::t2ADDri); - Inst.addOperand(MCOperand::createReg(0)); // cc_out - break; case ARM::t2SUBri12: - // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub" - // mnemonic was used (not "subw"), encoding T3 is preferred. - if (static_cast(*Operands[0]).getToken() != "sub" || + case ARM::t2ADDspImm12: + case ARM::t2SUBspImm12: { + // If the immediate fits for encoding T3 and the generic + // mnemonic was used, encoding T3 is preferred. + const StringRef Token = static_cast(*Operands[0]).getToken(); + if ((Token != "add" && Token != "sub") || ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) break; - Inst.setOpcode(ARM::t2SUBri); + switch (Inst.getOpcode()) { + case ARM::t2ADDri12: + Inst.setOpcode(ARM::t2ADDri); + break; + case ARM::t2SUBri12: + Inst.setOpcode(ARM::t2SUBri); + break; + case ARM::t2ADDspImm12: + Inst.setOpcode(ARM::t2ADDspImm); + break; + case ARM::t2SUBspImm12: + Inst.setOpcode(ARM::t2SUBspImm); + break; + } + Inst.addOperand(MCOperand::createReg(0)); // cc_out - break; + return true; + } case ARM::tADDi8: // If the immediate is in the range 0-7, we want tADDi3 iff Rd was // explicitly specified. From the ARM ARM: "Encoding T1 is preferred @@ -9812,6 +9844,25 @@ Inst = TmpInst; return true; } + case ARM::t2ADDspImm: + case ARM::t2SUBspImm: { + // Prefer T1 encoding if possible + if (Inst.getOperand(5).getReg() != 0 || HasWideQualifier) + break; + unsigned V = Inst.getOperand(2).getImm(); + if (V & 3 || V > ((1 << 7) - 1) << 2) + break; + MCInst TmpInst; + TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDspImm ? ARM::tADDspi + : ARM::tSUBspi); + TmpInst.addOperand(MCOperand::createReg(ARM::SP)); // destination reg + TmpInst.addOperand(MCOperand::createReg(ARM::SP)); // source reg + TmpInst.addOperand(MCOperand::createImm(V / 4)); // immediate + TmpInst.addOperand(Inst.getOperand(3)); // pred + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } case ARM::t2ADDrr: { // If the destination and first source operand are the same, and // there's no setting of the flags, use encoding T2 instead of T3. diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp --- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -201,6 +201,9 @@ uint64_t Address, const void *Decoder); static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, @@ -563,6 +566,9 @@ static DecodeStatus DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + #include "ARMGenDisassemblerTables.inc" static MCDisassembler *createARMDisassembler(const Target &T, @@ -1225,6 +1231,17 @@ return S; } +static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo != 13) + return MCDisassembler::Fail; + + unsigned Register = GPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Register)); + return MCDisassembler::Success; +} + static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { unsigned Register = 0; @@ -5582,14 +5599,25 @@ unsigned sign1 = fieldFromInstruction(Insn, 21, 1); unsigned sign2 = fieldFromInstruction(Insn, 23, 1); if (sign1 != sign2) return MCDisassembler::Fail; + const unsigned Rd = fieldFromInstruction(Insn, 8, 4); + assert(Inst.getNumOperands() == 0 && "We should receive an empty Inst"); + DecodeStatus S = DecoderGPRRegisterClass(Inst, Rd, Address, Decoder); unsigned Val = fieldFromInstruction(Insn, 0, 8); Val |= fieldFromInstruction(Insn, 12, 3) << 8; Val |= fieldFromInstruction(Insn, 26, 1) << 11; - Val |= sign1 << 12; - Inst.addOperand(MCOperand::createImm(SignExtend32<13>(Val))); - - return MCDisassembler::Success; + // If sign, then it is decreasing the address. + if (sign1) { + // Following ARMv7 Architecture Manual, when the offset + // is zero, it is decoded as a subw, not as a adr.w + if (!Val) { + Inst.setOpcode(ARM::t2SUBri12); + Inst.addOperand(MCOperand::createReg(ARM::PC)); + } else + Val = -Val; + } + Inst.addOperand(MCOperand::createImm(Val)); + return S; } static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val, @@ -6589,3 +6617,40 @@ Inst.addOperand(MCOperand::createReg(ARM::VPR)); return S; } + +static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + const unsigned Rd = fieldFromInstruction(Insn, 8, 4); + const unsigned Rn = fieldFromInstruction(Insn, 16, 4); + const unsigned Imm12 = fieldFromInstruction(Insn, 26, 1) << 11 | + fieldFromInstruction(Insn, 12, 3) << 8 | + fieldFromInstruction(Insn, 0, 8); + const unsigned TypeT3 = fieldFromInstruction(Insn, 25, 1); + unsigned sign1 = fieldFromInstruction(Insn, 21, 1); + unsigned sign2 = fieldFromInstruction(Insn, 23, 1); + unsigned S = fieldFromInstruction(Insn, 20, 1); + if (sign1 != sign2) + return MCDisassembler::Fail; + + // T3 does a zext of imm12, where T2 does a ThumbExpandImm (T2SOImm) + DecodeStatus DS = MCDisassembler::Success; + if ((!Check(DS, + DecodeGPRspRegisterClass(Inst, Rd, Address, Decoder))) || // dst + (!Check(DS, DecodeGPRspRegisterClass(Inst, Rn, Address, Decoder)))) + return MCDisassembler::Fail; + if (TypeT3) { + Inst.setOpcode(sign1 ? ARM::t2SUBspImm12 : ARM::t2ADDspImm12); + S = 0; + Inst.addOperand(MCOperand::createImm(Imm12)); // zext imm12 + } else { + Inst.setOpcode(sign1 ? ARM::t2SUBspImm : ARM::t2ADDspImm); + if (!Check(DS, DecodeT2SOImm(Inst, Imm12, Address, Decoder))) // imm12 + return MCDisassembler::Fail; + } + if (!Check(DS, DecodeCCOutOperand(Inst, S, Address, Decoder))) // cc_out + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(0)); // pred + + return DS; +} diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -319,15 +319,19 @@ } bool HasCCOut = true; int ImmIsT2SO = ARM_AM::getT2SOImmVal(ThisVal); - - Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri; + bool ToSP = DestReg == ARM::SP; + unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri; + unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri; + unsigned t2SUBi12 = ToSP ? ARM::t2SUBspImm12 : ARM::t2SUBri12; + unsigned t2ADDi12 = ToSP ? ARM::t2ADDspImm12 : ARM::t2ADDri12; + Opc = isSub ? t2SUB : t2ADD; // Prefer T2: sub rd, rn, so_imm | sub sp, sp, so_imm if (ImmIsT2SO != -1) { NumBytes = 0; } else if (ThisVal < 4096) { // Prefer T3 if can make it in a single go: subw rd, rn, imm12 | subw sp, // sp, imm12 - Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; + Opc = isSub ? t2SUBi12 : t2ADDi12; HasCCOut = false; NumBytes = 0; } else { @@ -483,7 +487,8 @@ if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2? - if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) { + const bool IsSP = Opcode == ARM::t2ADDspImm12 || Opcode == ARM::t2ADDspImm; + if (IsSP || Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) { Offset += MI.getOperand(FrameRegIdx+1).getImm(); unsigned PredReg; @@ -500,14 +505,14 @@ return true; } - bool HasCCOut = Opcode != ARM::t2ADDri12; + bool HasCCOut = (Opcode != ARM::t2ADDspImm12 && Opcode != ARM::t2ADDri12); if (Offset < 0) { Offset = -Offset; isSub = true; - MI.setDesc(TII.get(ARM::t2SUBri)); + MI.setDesc(IsSP ? TII.get(ARM::t2SUBspImm) : TII.get(ARM::t2SUBri)); } else { - MI.setDesc(TII.get(ARM::t2ADDri)); + MI.setDesc(IsSP ? TII.get(ARM::t2ADDspImm) : TII.get(ARM::t2ADDri)); } // Common case: small offset, fits into instruction. @@ -523,7 +528,8 @@ // Another common case: imm12. if (Offset < 4096 && (!HasCCOut || MI.getOperand(MI.getNumOperands()-1).getReg() == 0)) { - unsigned NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; + unsigned NewOpc = isSub ? IsSP ? ARM::t2SUBspImm12 : ARM::t2SUBri12 + : IsSP ? ARM::t2ADDspImm12 : ARM::t2ADDri12; MI.setDesc(TII.get(NewOpc)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); diff --git a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-arithmetic-ops.mir b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-arithmetic-ops.mir --- a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-arithmetic-ops.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-arithmetic-ops.mir @@ -64,7 +64,7 @@ %1(s32) = G_CONSTANT i32 786444 ; 0x000c000c %2(s32) = G_ADD %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]]:gprnopc = t2ADDri [[VREGX]], 786444, 14, $noreg, $noreg + ; CHECK: [[VREGRES:%[0-9]+]]:rgpr = t2ADDri [[VREGX]], 786444, 14, $noreg, $noreg $r0 = COPY %2(s32) ; CHECK: $r0 = COPY [[VREGRES]] @@ -92,7 +92,7 @@ %1(s32) = G_CONSTANT i32 4093 %2(s32) = G_ADD %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]]:gprnopc = t2ADDri12 [[VREGX]], 4093, 14, $noreg + ; CHECK: [[VREGRES:%[0-9]+]]:rgpr = t2ADDri12 [[VREGX]], 4093, 14, $noreg $r0 = COPY %2(s32) ; CHECK: $r0 = COPY [[VREGRES]] @@ -178,7 +178,7 @@ %1(s32) = G_CONSTANT i32 786444 ; 0x000c000c %2(s32) = G_SUB %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]]:gprnopc = t2SUBri [[VREGX]], 786444, 14, $noreg, $noreg + ; CHECK: [[VREGRES:%[0-9]+]]:rgpr = t2SUBri [[VREGX]], 786444, 14, $noreg, $noreg $r0 = COPY %2(s32) ; CHECK: $r0 = COPY [[VREGRES]] diff --git a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-load-store.mir b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-load-store.mir --- a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-load-store.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-load-store.mir @@ -168,7 +168,7 @@ liveins: $r0, $r1, $r2, $r3 %0(p0) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[FI32VREG:%[0-9]+]]:gprnopc = t2ADDri %fixed-stack.[[FI32]], 0, 14, $noreg, $noreg + ; CHECK: [[FI32VREG:%[0-9]+]]:rgpr = t2ADDri %fixed-stack.[[FI32]], 0, 14, $noreg, $noreg %1(s32) = G_LOAD %0(p0) :: (load 4) ; CHECK: [[LD32VREG:%[0-9]+]]:gpr = t2LDRi12 [[FI32VREG]], 0, 14, $noreg @@ -177,7 +177,7 @@ ; CHECK: $r0 = COPY [[LD32VREG]] %2(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[FI1VREG:%[0-9]+]]:gprnopc = t2ADDri %fixed-stack.[[FI1]], 0, 14, $noreg, $noreg + ; CHECK: [[FI1VREG:%[0-9]+]]:rgpr = t2ADDri %fixed-stack.[[FI1]], 0, 14, $noreg, $noreg %3(s1) = G_LOAD %2(p0) :: (load 1) ; CHECK: [[LD1VREG:%[0-9]+]]:gprnopc = t2LDRBi12 [[FI1VREG]], 0, 14, $noreg diff --git a/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir b/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir --- a/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir +++ b/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir @@ -1,7 +1,7 @@ --- | ; RUN: llc --run-pass=prologepilog -o - %s | FileCheck %s ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7 - ; CHECK-NEXT: $sp = frame-setup t2SUBri12 killed $sp, 4008, 14, $noreg + ; CHECK-NEXT: $sp = frame-setup t2SUBspImm12 killed $sp, 4008, 14, $noreg target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv7-none-none-eabi" diff --git a/llvm/test/CodeGen/Thumb2/bug-subw.ll b/llvm/test/CodeGen/Thumb2/bug-subw.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/bug-subw.ll @@ -0,0 +1,74 @@ +; pr23772 - [ARM] r226200 can emit illegal thumb2 instruction: "sub sp, r12, #80" +; RUN: llc -march=thumb -mcpu=cortex-m3 -O3 -filetype=asm -o - %s | FileCheck %s +; CHECK-NOT: sub{{.*}} sp, r{{.*}}, # +; CHECK: .fnend +; TODO: Missed optimization. The three instructions generated to subtract SP can be converged to a single one +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32" +target triple = "thumbv7m-unknown-unknown" +%B = type {%B*} +%R = type {i32} +%U = type {%U*, i8, i8} +%E = type {%B*, %U*} +%X = type {i32, i8, i8} +declare external [0 x i8]* @memalloc(i32, i32, i32) +declare external void @memfree([0 x i8]*, i32, i32) +define void @foo(%B* %pb$, %R* %pr$) nounwind { +L.0: + %pb = alloca %B* + %pr = alloca %R* + store %B* %pb$, %B** %pb + store %R* %pr$, %R** %pr + %pe = alloca %E* + %0 = load %B*, %B** %pb + %1 = bitcast %B* %0 to %E* + store %E* %1, %E** %pe + %2 = load %R*, %R** %pr + %3 = getelementptr %R, %R* %2, i32 0, i32 0 + %4 = load i32, i32* %3 + switch i32 %4, label %L.1 [ + i32 1, label %L.3 + ] +L.3: + %px = alloca %X* + %5 = load %R*, %R** %pr + %6 = bitcast %R* %5 to %X* + store %X* %6, %X** %px + %7 = load %X*, %X** %px + %8 = getelementptr %X, %X* %7, i32 0, i32 0 + %9 = load i32, i32* %8 + %10 = icmp ne i32 %9, 0 + br i1 %10, label %L.5, label %L.4 +L.5: + %pu = alloca %U* + %11 = call [0 x i8]* @memalloc(i32 8, i32 4, i32 0) + %12 = bitcast [0 x i8]* %11 to %U* + store %U* %12, %U** %pu + %13 = load %X*, %X** %px + %14 = getelementptr %X, %X* %13, i32 0, i32 1 + %15 = load i8, i8* %14 + %16 = load %U*, %U** %pu + %17 = getelementptr %U, %U* %16, i32 0, i32 1 + store i8 %15, i8* %17 + %18 = load %E*, %E** %pe + %19 = getelementptr %E, %E* %18, i32 0, i32 1 + %20 = load %U*, %U** %19 + %21 = load %U*, %U** %pu + %22 = getelementptr %U, %U* %21, i32 0, i32 0 + store %U* %20, %U** %22 + %23 = load %U*, %U** %pu + %24 = load %E*, %E** %pe + %25 = getelementptr %E, %E* %24, i32 0, i32 1 + store %U* %23, %U** %25 + br label %L.4 +L.4: + %26 = load %X*, %X** %px + %27 = bitcast %X* %26 to [0 x i8]* + call void @memfree([0 x i8]* %27, i32 8, i32 0) + br label %L.2 +L.1: + br label %L.2 +L.2: + br label %return +return: + ret void +} diff --git a/llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir b/llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir --- a/llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir +++ b/llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir @@ -27,7 +27,7 @@ ; CHECK: frame-setup CFI_INSTRUCTION offset $r6, -28 ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -32 ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -36 - ; CHECK: $sp = frame-setup t2SUBri killed $sp, 1208, 14, $noreg, $noreg + ; CHECK: $sp = frame-setup t2SUBspImm killed $sp, 1208, 14, $noreg, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 1244 ; CHECK: $r0 = IMPLICIT_DEF ; CHECK: $r1 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir b/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir --- a/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir +++ b/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir @@ -118,7 +118,7 @@ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r6, -28 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r5, -32 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r4, -36 - ; CHECK-NEXT: $sp = frame-setup t2SUBri12 killed $sp, 1220, 14, $noreg + ; CHECK-NEXT: $sp = frame-setup t2SUBspImm12 killed $sp, 1220, 14, $noreg ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1256 ; CHECK-NEXT: $r0 = IMPLICIT_DEF ; CHECK-NEXT: $r1 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/Thumb2/peephole-addsub.mir b/llvm/test/CodeGen/Thumb2/peephole-addsub.mir --- a/llvm/test/CodeGen/Thumb2/peephole-addsub.mir +++ b/llvm/test/CodeGen/Thumb2/peephole-addsub.mir @@ -22,7 +22,7 @@ %0:rgpr = COPY $r0 %2:rgpr = t2MOVi 1, 14, $noreg, $noreg %3:gprnopc = t2ADDrr %0, %1, 14, $noreg, $noreg - %4:gprnopc = t2SUBri %3, 0, 14, $noreg, def dead $cpsr + %4:rgpr = t2SUBri %3, 0, 14, $noreg, def dead $cpsr t2CMPri killed %3, 0, 14, $noreg, implicit-def $cpsr %5:rgpr = t2MOVCCi %2, 0, 7, $cpsr $r0 = COPY %5 @@ -30,6 +30,6 @@ # CHECK-LABEL: name: test # CHECK: %3:gprnopc = t2ADDrr %0, %1, 14, $noreg, $noreg -# CHECK-NEXT: %4:gprnopc = t2SUBri %3, 0, 14, $noreg, def $cpsr +# CHECK-NEXT: %4:rgpr = t2SUBri %3, 0, 14, $noreg, def $cpsr # CHECK-NEXT: %5:rgpr = t2MOVCCi %2, 0, 7, $cpsr ... diff --git a/llvm/test/CodeGen/Thumb2/peephole-cmp.mir b/llvm/test/CodeGen/Thumb2/peephole-cmp.mir --- a/llvm/test/CodeGen/Thumb2/peephole-cmp.mir +++ b/llvm/test/CodeGen/Thumb2/peephole-cmp.mir @@ -23,7 +23,7 @@ liveins: $r0 %0:rgpr = COPY $r0 - %1:gprnopc = t2ADDri %stack.0.f, 0, 14, $noreg, $noreg + %1:rgpr = t2ADDri %stack.0.f, 0, 14, $noreg, $noreg t2CMPrr %1, %0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 3, $cpsr t2B %bb.1, 14, $noreg @@ -37,7 +37,7 @@ tBX_RET 14, $noreg # CHECK-LABEL: name: test_addir_frameindex -# CHECK: %1:gprnopc = t2ADDri %stack.0.f, 0, 14, $noreg, $noreg +# CHECK: %1:rgpr = t2ADDri %stack.0.f, 0, 14, $noreg, $noreg # CHECK-NEXT: t2CMPrr %1, %0, 14, $noreg, implicit-def $cpsr # CHECK-NEXT: t2Bcc %bb.2, 3, $cpsr ... diff --git a/llvm/test/CodeGen/Thumb2/t2peephole-t2ADDrr-to-t2ADDri.ll b/llvm/test/CodeGen/Thumb2/t2peephole-t2ADDrr-to-t2ADDri.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/t2peephole-t2ADDrr-to-t2ADDri.ll @@ -0,0 +1,10 @@ +; RUN: llc -mtriple=thumb-eabi --stop-after=peephole-opt -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s +define i32 @t2_const_var2_1_ok_2(i32 %lhs) { +; CHECK: [[R0:%0|%[1-9][0-9]*]]:gprnopc = COPY $r0 +; CHECK-NEXT: [[R1:%0|%[1-9][0-9]*]]:rgpr = t2ADDri [[R0]], 11206656 +; CHECK-NEXT: [[R2:%0|%[1-9][0-9]*]]:rgpr = t2ADDri killed [[R1]], 187 +; CHECK-NEXT: $r0 = COPY [[R2]] + %ret = add i32 %lhs, 11206843 ; 0x00ab00bb + ret i32 %ret +} + diff --git a/llvm/test/MC/ARM/basic-thumb2-instructions.s b/llvm/test/MC/ARM/basic-thumb2-instructions.s --- a/llvm/test/MC/ARM/basic-thumb2-instructions.s +++ b/llvm/test/MC/ARM/basic-thumb2-instructions.s @@ -79,7 +79,6 @@ adds r2, r2, #56 adds r2, #56 add r1, r7, #0xcbcbcbcb - add sp, sp, #0x1fe0000 adds.w r2, #-16 adds.w r2, r2, #-16 @@ -103,7 +102,6 @@ @ CHECK: adds r2, #56 @ encoding: [0x38,0x32] @ CHECK: adds r2, #56 @ encoding: [0x38,0x32] @ CHECK: add.w r1, r7, #3419130827 @ encoding: [0x07,0xf1,0xcb,0x31] -@ CHECK: add.w sp, sp, #33423360 @ encoding: [0x0d,0xf1,0xff,0x7d] @ CHECK: subs.w r2, r2, #16 @ encoding: [0xb2,0xf1,0x10,0x02] @ CHECK: subs.w r2, r2, #16 @ encoding: [0xb2,0xf1,0x10,0x02] @@ -182,7 +180,102 @@ @ CHECK: addw r6, sp, #1020 @ encoding: [0x0d,0xf2,0xfc,0x36] add r6, sp, #1019 // T4 @ CHECK: addw r6, sp, #1019 @ encoding: [0x0d,0xf2,0xfb,0x36] - + addw r0, r0, #4095 + addw r0, #4095 + add r0, r0, #4095 + add r0, #4095 +@ CHECK-NEXT: addw r0, r0, #4095 @ encoding: [0x00,0xf6,0xff,0x70] +@ CHECK-NEXT: addw r0, r0, #4095 @ encoding: [0x00,0xf6,0xff,0x70] +@ CHECK-NEXT: addw r0, r0, #4095 @ encoding: [0x00,0xf6,0xff,0x70] +@ CHECK-NEXT: addw r0, r0, #4095 @ encoding: [0x00,0xf6,0xff,0x70] +add.w r0, r0, #-4096 +add r0, r0, #-4096 +add.w r0, #-4096 +add r0, #-4096 +@ CHECK-NEXT: sub.w r0, r0, #4096 @ encoding: [0xa0,0xf5,0x80,0x50] +@ CHECK-NEXT: sub.w r0, r0, #4096 @ encoding: [0xa0,0xf5,0x80,0x50] +@ CHECK-NEXT: sub.w r0, r0, #4096 @ encoding: [0xa0,0xf5,0x80,0x50] +@ CHECK-NEXT: sub.w r0, r0, #4096 @ encoding: [0xa0,0xf5,0x80,0x50] +adds.w r0, r0, #-4096 +adds r0, r0, #-4096 +adds.w r0, #-4096 +adds r0, #-4096 +@ CHECK-NEXT: subs.w r0, r0, #4096 @ encoding: [0xb0,0xf5,0x80,0x50] +@ CHECK-NEXT: subs.w r0, r0, #4096 @ encoding: [0xb0,0xf5,0x80,0x50] +@ CHECK-NEXT: subs.w r0, r0, #4096 @ encoding: [0xb0,0xf5,0x80,0x50] +@ CHECK-NEXT: subs.w r0, r0, #4096 @ encoding: [0xb0,0xf5,0x80,0x50] +@------------------------------------------------------------------------------ +@ ADD (SP plus immediate, writing to SP) +@------------------------------------------------------------------------------ + add.w sp, sp, #0x1fe0000 //T3 + add.w sp, #0x1fe0000 + add sp, sp, #0x1fe0000 + add sp, #0x1fe0000 +@ CHECK-NEXT: add.w sp, sp, #33423360 @ encoding: [0x0d,0xf1,0xff,0x7d] +@ CHECK-NEXT: add.w sp, sp, #33423360 @ encoding: [0x0d,0xf1,0xff,0x7d] +@ CHECK-NEXT: add.w sp, sp, #33423360 @ encoding: [0x0d,0xf1,0xff,0x7d] +@ CHECK-NEXT: add.w sp, sp, #33423360 @ encoding: [0x0d,0xf1,0xff,0x7d] + adds.w sp, sp, #0x1fe0000 //T3 + adds.w sp, #0x1fe0000 + adds sp, sp, #0x1fe0000 + adds sp, #0x1fe0000 +@ CHECK-NEXT: adds.w sp, sp, #33423360 @ encoding: [0x1d,0xf1,0xff,0x7d] +@ CHECK-NEXT: adds.w sp, sp, #33423360 @ encoding: [0x1d,0xf1,0xff,0x7d] +@ CHECK-NEXT: adds.w sp, sp, #33423360 @ encoding: [0x1d,0xf1,0xff,0x7d] +@ CHECK-NEXT: adds.w sp, sp, #33423360 @ encoding: [0x1d,0xf1,0xff,0x7d] + addw sp, sp, #4095 //T4 + add sp, sp, #4095 + addw sp, #4095 + add sp, #4095 +@ CHECK-NEXT: addw sp, sp, #4095 @ encoding: [0x0d,0xf6,0xff,0x7d] +@ CHECK-NEXT: addw sp, sp, #4095 @ encoding: [0x0d,0xf6,0xff,0x7d] +@ CHECK-NEXT: addw sp, sp, #4095 @ encoding: [0x0d,0xf6,0xff,0x7d] +@ CHECK-NEXT: addw sp, sp, #4095 @ encoding: [0x0d,0xf6,0xff,0x7d] + add sp, sp, #128 //T2 + add sp, #128 +@ CHECK-NEXT: add sp, #128 @ encoding: [0x20,0xb0] +@ CHECK-NEXT: add sp, #128 @ encoding: [0x20,0xb0] + adds sp, sp, #128 //T3 + adds sp, #128 +@ CHECK-NEXT: adds.w sp, sp, #128 @ encoding: [0x1d,0xf1,0x80,0x0d] +@ CHECK-NEXT: adds.w sp, sp, #128 @ encoding: [0x1d,0xf1,0x80,0x0d] + add r0, sp, #128 //T1 +@ CHECK-NEXT: add r0, sp, #128 @ encoding: [0x20,0xa8] + adds r0, sp, #128 //T3 +@ CHECK-NEXT: adds.w r0, sp, #128 @ encoding: [0x1d,0xf1,0x80,0x00] + addw r0, sp, #128 +@ CHECK-NEXT: addw r0, sp, #128 @ encoding: [0x0d,0xf2,0x80,0x00] +@------------------------------------------------------------------------------ +@ ADD (SP plus negative immediate, writing to SP) +@------------------------------------------------------------------------------ +add sp, sp, #-508 +add sp, #-508 +@ CHECK-NEXT: sub sp, #508 @ encoding: [0xff,0xb0] +@ CHECK-NEXT: sub sp, #508 @ encoding: [0xff,0xb0] +addw sp, sp, #-4095 +add sp, sp, #-4095 +addw sp, #-4095 +add sp, #-4095 +@ CHECK-NEXT: subw sp, sp, #4095 @ encoding: [0xad,0xf6,0xff,0x7d] +@ CHECK-NEXT: subw sp, sp, #4095 @ encoding: [0xad,0xf6,0xff,0x7d] +@ CHECK-NEXT: subw sp, sp, #4095 @ encoding: [0xad,0xf6,0xff,0x7d] +@ CHECK-NEXT: subw sp, sp, #4095 @ encoding: [0xad,0xf6,0xff,0x7d] +add.w sp, sp, #-4096 +add sp, sp, #-4096 +add.w sp, #-4096 +add sp, #-4096 +@ CHECK-NEXT: sub.w sp, sp, #4096 @ encoding: [0xad,0xf5,0x80,0x5d] +@ CHECK-NEXT: sub.w sp, sp, #4096 @ encoding: [0xad,0xf5,0x80,0x5d] +@ CHECK-NEXT: sub.w sp, sp, #4096 @ encoding: [0xad,0xf5,0x80,0x5d] +@ CHECK-NEXT: sub.w sp, sp, #4096 @ encoding: [0xad,0xf5,0x80,0x5d] +adds.w sp, sp, #-4096 +adds sp, sp, #-4096 +adds.w sp, #-4096 +adds sp, #-4096 +@ CHECK-NEXT: subs.w sp, sp, #4096 @ encoding: [0xbd,0xf5,0x80,0x5d] +@ CHECK-NEXT: subs.w sp, sp, #4096 @ encoding: [0xbd,0xf5,0x80,0x5d] +@ CHECK-NEXT: subs.w sp, sp, #4096 @ encoding: [0xbd,0xf5,0x80,0x5d] +@ CHECK-NEXT: subs.w sp, sp, #4096 @ encoding: [0xbd,0xf5,0x80,0x5d] @------------------------------------------------------------------------------ @ ADD (SP plus register) A8.8.10 @------------------------------------------------------------------------------ @@ -206,8 +299,22 @@ @ CHECK: it eq @ encoding: [0x08,0xbf] addeq r2, sp, ip // T3 @ CHECK: addeq.w r2, sp, r12 @ encoding: [0x0d,0xeb,0x0c,0x02] - - + add.w r0, sp, r0, ror #2 + add r0, sp, r0, ror #2 + add sp, r1, lsl #15 + adds.w r0, sp, r0, ror #2 + adds r0, sp, r0, ror #2 + adds.w sp, sp, r0, ror #31 + adds sp, sp, r0, ror #31 + adds sp, r0, ror #31 +@ CHECK-NEXT: add.w r0, sp, r0, ror #2 @ encoding: [0x0d,0xeb,0xb0,0x00] +@ CHECK-NEXT: add.w r0, sp, r0, ror #2 @ encoding: [0x0d,0xeb,0xb0,0x00] +@ CHECK-NEXT: add.w sp, sp, r1, lsl #15 @ encoding: [0x0d,0xeb,0xc1,0x3d] +@ CHECK-NEXT: adds.w r0, sp, r0, ror #2 @ encoding: [0x1d,0xeb,0xb0,0x00] +@ CHECK-NEXT: adds.w r0, sp, r0, ror #2 @ encoding: [0x1d,0xeb,0xb0,0x00] +@ CHECK-NEXT: adds.w sp, sp, r0, ror #31 @ encoding: [0x1d,0xeb,0xf0,0x7d] +@ CHECK-NEXT: adds.w sp, sp, r0, ror #31 @ encoding: [0x1d,0xeb,0xf0,0x7d] +@ CHECK-NEXT: adds.w sp, sp, r0, ror #31 @ encoding: [0x1d,0xeb,0xf0,0x7d] @------------------------------------------------------------------------------ @ FIXME: ADR @------------------------------------------------------------------------------ @@ -3083,7 +3190,10 @@ sub r0, r0, #32 subs r2, r2, #56 subs r2, #56 - + subw r0, r0, #4095 + subw r0, #4095 + sub r0, r0, #4095 + sub r0, #4095 @ CHECK: itet eq @ encoding: [0x0a,0xbf] @ CHECK: subeq r1, r2, #4 @ encoding: [0x11,0x1f] @ CHECK: subwne r5, r3, #1023 @ encoding: [0xa3,0xf2,0xff,0x35] @@ -3099,8 +3209,47 @@ @ CHECK: sub.w r0, r0, #32 @ encoding: [0xa0,0xf1,0x20,0x00] @ CHECK: subs r2, #56 @ encoding: [0x38,0x3a] @ CHECK: subs r2, #56 @ encoding: [0x38,0x3a] - - +@ CHECK-NEXT: subw r0, r0, #4095 @ encoding: [0xa0,0xf6,0xff,0x70] +@ CHECK-NEXT: subw r0, r0, #4095 @ encoding: [0xa0,0xf6,0xff,0x70] +@ CHECK-NEXT: subw r0, r0, #4095 @ encoding: [0xa0,0xf6,0xff,0x70] +@ CHECK-NEXT: subw r0, r0, #4095 @ encoding: [0xa0,0xf6,0xff,0x70] +@------------------------------------------------------------------------------ +@ SUB (immediate, writting to SP) +@------------------------------------------------------------------------------ + sub.w sp, sp, #0x1fe0000 //T2 + sub sp, sp, #0x1fe0000 + sub.w sp, #0x1fe0000 + sub sp, #0x1fe0000 +@ CHECK-NEXT: sub.w sp, sp, #33423360 @ encoding: [0xad,0xf1,0xff,0x7d] +@ CHECK-NEXT: sub.w sp, sp, #33423360 @ encoding: [0xad,0xf1,0xff,0x7d] +@ CHECK-NEXT: sub.w sp, sp, #33423360 @ encoding: [0xad,0xf1,0xff,0x7d] +@ CHECK-NEXT: sub.w sp, sp, #33423360 @ encoding: [0xad,0xf1,0xff,0x7d] + subs.w sp, sp, #0x1fe0000 //T2 + subs sp, sp, #0x1fe0000 + subs.w sp, #0x1fe0000 + subs sp, #0x1fe0000 +@ CHECK-NEXT: subs.w sp, sp, #33423360 @ encoding: [0xbd,0xf1,0xff,0x7d] +@ CHECK-NEXT: subs.w sp, sp, #33423360 @ encoding: [0xbd,0xf1,0xff,0x7d] +@ CHECK-NEXT: subs.w sp, sp, #33423360 @ encoding: [0xbd,0xf1,0xff,0x7d] +@ CHECK-NEXT: subs.w sp, sp, #33423360 @ encoding: [0xbd,0xf1,0xff,0x7d] + subw sp, sp, #4095 //T3 + sub sp, sp, #4095 + subw sp, #4095 + sub sp, #4095 +@ CHECK-NEXT: subw sp, sp, #4095 @ encoding: [0xad,0xf6,0xff,0x7d] +@ CHECK-NEXT: subw sp, sp, #4095 @ encoding: [0xad,0xf6,0xff,0x7d] +@ CHECK-NEXT: subw sp, sp, #4095 @ encoding: [0xad,0xf6,0xff,0x7d] +@ CHECK-NEXT: subw sp, sp, #4095 @ encoding: [0xad,0xf6,0xff,0x7d] + sub sp, #128 //T1 +@ CHECK-NEXT: sub sp, #128 @ encoding: [0xa0,0xb0] + subs.w sp, #128 //T2 + subs sp, #128 //T2 +@ CHECK-NEXT: subs.w sp, sp, #128 @ encoding: [0xbd,0xf1,0x80,0x0d] +@ CHECK-NEXT: subs.w sp, sp, #128 @ encoding: [0xbd,0xf1,0x80,0x0d] + sub.w sp, #128 //T2 +@ CHECK-NEXT: sub.w sp, sp, #128 @ encoding: [0xad,0xf1,0x80,0x0d] + subw sp, #128 //T4 +@ CHECK-NEXT: subw sp, sp, #128 @ encoding: [0xad,0xf2,0x80,0x0d] @------------------------------------------------------------------------------ @ SUB (register) @------------------------------------------------------------------------------ diff --git a/llvm/test/MC/ARM/invalid-addsub.s b/llvm/test/MC/ARM/invalid-addsub.s --- a/llvm/test/MC/ARM/invalid-addsub.s +++ b/llvm/test/MC/ARM/invalid-addsub.s @@ -1,20 +1,68 @@ -@ RUN: not llvm-mc -triple thumbv7-apple-ios %s -o - 2>&1 | FileCheck %s - -@ CHECK: error: source register must be sp if destination is sp -@ CHECK: error: source register must be sp if destination is sp -@ CHECK: error: source register must be sp if destination is sp -@ CHECK: error: source register must be sp if destination is sp +@ RUN: not llvm-mc -triple thumbv7-apple-ios %s -o /dev/null 2>&1 | FileCheck %s add sp, r5, #1 addw sp, r7, #4 add sp, r3, r2 add sp, r3, r5, lsl #3 - - -@ CHECK: error: source register must be sp if destination is sp -@ CHECK: error: source register must be sp if destination is sp -@ CHECK: error: source register must be sp if destination is sp -@ CHECK: error: source register must be sp if destination is sp sub sp, r5, #1 subw sp, r7, #4 sub sp, r3, r2 sub sp, r3, r5, lsl #3 +@CHECK: error: invalid instruction, any one of the following would fix this: +@CHECK-NEXT: add sp, r5, #1 +@CHECK-NEXT: ^ +@CHECK-NEXT: note: invalid operand for instruction +@CHECK-NEXT: add sp, r5, #1 +@CHECK-NEXT: ^ +@CHECK-NEXT: note: operand must be a register in range [r0, r12] or r14 +@CHECK-NEXT: add sp, r5, #1 +@CHECK-NEXT: ^ +@CHECK-NEXT: note: operand must be a register in range [r0, r12] or r14 +@CHECK-NEXT: add sp, r5, #1 +@CHECK-NEXT: ^ +@CHECK-NEXT: note: operand must be a register sp +@CHECK-NEXT: add sp, r5, #1 +@CHECK-NEXT: ^ +@CHECK-NEXT: error: invalid instruction, any one of the following would fix this: +@CHECK-NEXT: addw sp, r7, #4 +@CHECK-NEXT: ^ +@CHECK-NEXT: note: operand must be a register in range [r0, r12] or r14 +@CHECK-NEXT: addw sp, r7, #4 +@CHECK-NEXT: ^ +@CHECK-NEXT: note: operand must be a register sp +@CHECK-NEXT: addw sp, r7, #4 +@CHECK-NEXT: ^ +@CHECK-NEXT: error: source register must be sp if destination is sp +@CHECK-NEXT: add sp, r3, r2 +@CHECK-NEXT: ^ +@CHECK-NEXT: error: source register must be sp if destination is sp +@CHECK-NEXT: add sp, r3, r5, lsl #3 +@CHECK-NEXT: ^ +@CHECK-NEXT: error: invalid instruction, any one of the following would fix this: +@CHECK-NEXT: sub sp, r5, #1 +@CHECK-NEXT: ^ +@CHECK-NEXT: note: invalid operand for instruction +@CHECK-NEXT: sub sp, r5, #1 +@CHECK-NEXT: ^ +@CHECK-NEXT: note: operand must be a register in range [r0, r12] or r14 +@CHECK-NEXT: sub sp, r5, #1 +@CHECK-NEXT: ^ +@CHECK-NEXT: note: operand must be a register in range [r0, r12] or r14 +@CHECK-NEXT: sub sp, r5, #1 +@CHECK-NEXT: ^ +@CHECK-NEXT: note: operand must be a register sp +@CHECK-NEXT: sub sp, r5, #1 +@CHECK-NEXT: ^ +@CHECK-NEXT: error: invalid instruction, any one of the following would fix this: +@CHECK-NEXT: subw sp, r7, #4 +@CHECK-NEXT: ^ +@CHECK-NEXT: note: operand must be a register in range [r0, r12] or r14 +@CHECK-NEXT: subw sp, r7, #4 +@CHECK-NEXT: ^ +@CHECK-NEXT: note: operand must be a register sp +@CHECK-NEXT: subw sp, r7, #4 +@CHECK-NEXT: ^ +@CHECK-NEXT: error: source register must be sp if destination is sp +@CHECK-NEXT: sub sp, r3, r2 +@CHECK-NEXT: ^ +@CHECK-NEXT: error: source register must be sp if destination is sp +@CHECK-NEXT: sub sp, r3, r5, lsl #3 diff --git a/llvm/test/MC/ARM/negative-immediates.s b/llvm/test/MC/ARM/negative-immediates.s --- a/llvm/test/MC/ARM/negative-immediates.s +++ b/llvm/test/MC/ARM/negative-immediates.s @@ -51,7 +51,7 @@ .thumb ADD r0, r1, #0xFFFFFF00 -# CHECK: subw r0, r1, #256 +# CHECK: sub.w r0, r1, #256 # CHECK-DISABLED: note: instruction requires: NegativeImmediates # CHECK-DISABLED: ADD ADDS r0, r1, #0xFFFFFF00 @@ -175,7 +175,7 @@ # CHECK-DISABLED: note: instruction requires: NegativeImmediates # CHECK-DISABLED: SUB.W SUB r0, r1, #0xFFFFFF00 -# CHECK: addw r0, r1, #256 +# CHECK: add.w r0, r1, #256 # CHECK-DISABLED: note: instruction requires: NegativeImmediates # CHECK-DISABLED: SUB SUBS r0, r1, #0xFFFFFF00 @@ -188,6 +188,6 @@ # CHECK-DISABLED: SUBS.W ADD r0, r1, #-13 -# CHECK: subw r0, r1, #13 +# CHECK: sub.w r0, r1, #13 # CHECK-DISABLED: note: instruction requires: NegativeImmediates # CHECK-DISABLED: ADD diff --git a/llvm/test/MC/ARM/register-token-source-loc.s b/llvm/test/MC/ARM/register-token-source-loc.s --- a/llvm/test/MC/ARM/register-token-source-loc.s +++ b/llvm/test/MC/ARM/register-token-source-loc.s @@ -1,12 +1,11 @@ // RUN: not llvm-mc -triple armv6m--none-eabi < %s 2>&1 | FileCheck %s - -// Some of these CHECK lines need to uses regexes to that the amount of -// whitespace between the start of the line and the caret is significant. - add sp, r0, #4 -// CHECK: error: invalid instruction, any one of the following would fix this: -// CHECK: note: instruction requires: thumb2 -// CHECK: note: operand must be a register sp -// CHECK-NEXT: {{^ add sp, r0, #4}} -// CHECK-NEXT: {{^ \^}} -// CHECK: note: too many operands for instruction +// CHECK: error: invalid instruction, any one of the following would fix this: +// CHECK-NEXT: add sp, r0, #4 +// CHECK-NEXT: ^ +// CHECK-NEXT: note: operand must be a register sp +// CHECK-NEXT: add sp, r0, #4 +// CHECK-NEXT: ^ +// CHECK-NEXT: note: too many operands for instruction +// CHECK-NEXT: add sp, r0, #4 +// CHECK-NEXT: ^ diff --git a/llvm/test/MC/ARM/thumb-diagnostics.s b/llvm/test/MC/ARM/thumb-diagnostics.s --- a/llvm/test/MC/ARM/thumb-diagnostics.s +++ b/llvm/test/MC/ARM/thumb-diagnostics.s @@ -244,24 +244,44 @@ add sp, sp, #512 add r2, sp, #1024 @ CHECK-ERRORS: error: invalid instruction, any one of the following would fix this: -@ CHECK-ERRORS: add sp, #-1 -@ CHECK-ERRORS: ^ +@ CHECK-ERRORS: add sp, #-1 +@ CHECK-ERRORS: ^ @ CHECK-ERRORS: note: operand must be a register in range [r0, r15] +@ CHECK-ERRORS: add sp, #-1 +@ CHECK-ERRORS: ^ +@ CHECK-ERRORS: note: invalid operand for instruction +@ CHECK-ERRORS: add sp, #-1 +@ CHECK-ERRORS: ^ @ CHECK-ERRORS: note: instruction requires: thumb2 +@ CHECK-ERRORS: add sp, #-1 +@ CHECK-ERRORS: ^ @ CHECK-ERRORS: error: invalid instruction, any one of the following would fix this: -@ CHECK-ERRORS: add sp, #3 -@ CHECK-ERRORS: ^ +@ CHECK-ERRORS: add sp, #3 +@ CHECK-ERRORS: ^ @ CHECK-ERRORS: note: operand must be a register in range [r0, r15] +@ CHECK-ERRORS: add sp, #3 +@ CHECK-ERRORS: ^ +@ CHECK-ERRORS: note: invalid operand for instruction +@ CHECK-ERRORS: add sp, #3 +@ CHECK-ERRORS: ^ @ CHECK-ERRORS: note: instruction requires: thumb2 +@ CHECK-ERRORS: add sp, #3 +@ CHECK-ERRORS: ^ @ CHECK-ERRORS: error: invalid instruction, any one of the following would fix this: -@ CHECK-ERRORS: add sp, sp, #512 -@ CHECK-ERRORS: ^ +@ CHECK-ERRORS: add sp, sp, #512 +@ CHECK-ERRORS: ^ @ CHECK-ERRORS: note: operand must be a register in range [r0, r15] +@ CHECK-ERRORS: add sp, sp, #512 +@ CHECK-ERRORS: ^ +@ CHECK-ERRORS: note: invalid operand for instruction +@ CHECK-ERRORS: add sp, sp, #512 +@ CHECK-ERRORS: ^ @ CHECK-ERRORS: note: instruction requires: thumb2 +@ CHECK-ERRORS: add sp, sp, #512 +@ CHECK-ERRORS: ^ @ CHECK-ERRORS: error: instruction requires: thumb2 -@ CHECK-ERRORS: add r2, sp, #1024 -@ CHECK-ERRORS: ^ - +@ CHECK-ERRORS: add r2, sp, #1024 +@ CHECK-ERRORS: ^ add r2, sp, ip @ CHECK-ERRORS: error: source register must be the same as destination @ CHECK-ERRORS: add r2, sp, ip diff --git a/llvm/test/MC/Disassembler/ARM/invalid-thumbv7.txt b/llvm/test/MC/Disassembler/ARM/invalid-thumbv7.txt --- a/llvm/test/MC/Disassembler/ARM/invalid-thumbv7.txt +++ b/llvm/test/MC/Disassembler/ARM/invalid-thumbv7.txt @@ -425,3 +425,8 @@ # CHECK-V7-NEXT: [0xa3,0xeb,0x02,0x0d] # CHECK-V7: warning: potentially undefined instruction encoding # CHECK-V7-NEXT: [0xa3,0xeb,0xc5,0x0d] +# CHECK-V7-NEXT: ^ +[0x0f,0xf2,0x00,0x4d] +# CHECK-V7-NEXT: warning: potentially undefined instruction encoding +# CHECK-V7-NEXT: [0x0f,0xf2,0x00,0x4d] +# CHECK-V7-NEXT: ^ diff --git a/llvm/test/MC/Disassembler/ARM/thumb-tests.txt b/llvm/test/MC/Disassembler/ARM/thumb-tests.txt --- a/llvm/test/MC/Disassembler/ARM/thumb-tests.txt +++ b/llvm/test/MC/Disassembler/ARM/thumb-tests.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 -mcpu=cortex-a9 | FileCheck %s +# RUN: llvm-mc --disassemble --show-encoding %s -triple=thumbv7-apple-darwin9 -mcpu=cortex-a9 | FileCheck %s # CHECK: add r5, sp, #68 0x11 0xad @@ -84,9 +84,12 @@ # CHECK: sub sp, #60 0x8f 0xb0 -# CHECK: subw r0, pc, #1 +# CHECK: adr.w r0, #-1 0xaf 0xf2 0x01 0x00 +# CHECK: subw r0, pc, #0 +0xaf 0xf2 0x00 0x00 + # CHECK: subw r0, sp, #835 0xad 0xf2 0x43 0x30 @@ -155,7 +158,7 @@ # CHECK: ldm r5!, {r0, r1, r2, r3, r4} 0x1f 0xcd -# CHECK: addw r0, pc, #1050 +# CHECK: adr.w r0, #1050 0x0f 0xf2 0x1a 0x40 # CHECK: ldrd r3, r8, [r11, #-60] diff --git a/llvm/test/MC/Disassembler/ARM/thumb2-v8.txt b/llvm/test/MC/Disassembler/ARM/thumb2-v8.txt --- a/llvm/test/MC/Disassembler/ARM/thumb2-v8.txt +++ b/llvm/test/MC/Disassembler/ARM/thumb2-v8.txt @@ -38,3 +38,5 @@ 0x90 0xec 0x00 0x0e # CHECK: ldc p14 +[0x0f,0xf2,0x00,0x4d] +# CHECK: adr.w sp, #1024 diff --git a/llvm/test/MC/Disassembler/ARM/thumb2.txt b/llvm/test/MC/Disassembler/ARM/thumb2.txt --- a/llvm/test/MC/Disassembler/ARM/thumb2.txt +++ b/llvm/test/MC/Disassembler/ARM/thumb2.txt @@ -90,14 +90,14 @@ #------------------------------------------------------------------------------ # ADR #------------------------------------------------------------------------------ -# CHECK: subw r11, pc, #3270 -# CHECK: subw r11, pc, #826 -# CHECK: subw r1, pc, #0 - +# CHECK: adr.w r11, #-3270 +# CHECK-NEXT: adr.w r11, #-826 +# CHECK-NEXT: subw r1, pc, #0 +# CHECK-NEXT: adr.w r0, #1024 0xaf 0xf6 0xc6 0x4b 0xaf 0xf2 0x3a 0x3b 0xaf 0xf2 0x00 0x01 - +0x0f,0xf2,0x00,0x40 #------------------------------------------------------------------------------ # AND (immediate) #------------------------------------------------------------------------------ diff --git a/llvm/test/tools/llvm-mca/ARM/simple-cortex-m33.s b/llvm/test/tools/llvm-mca/ARM/simple-cortex-m33.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/ARM/simple-cortex-m33.s @@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=thumbv7 --mcpu=cortex-m33 -instruction-tables -o - %s | FileCheck %s + +sub sp, #4 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 1.00 U sub sp, #4 + +# CHECK: Resources: +# CHECK-NEXT: [0] - M4Unit + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] +# CHECK-NEXT: 1.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] Instructions: +# CHECK-NEXT: 1.00 sub sp, #4