diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td --- a/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -1589,9 +1589,9 @@ } class VFPAI pattern> + string opc, string asm, string cstr, list pattern> : VFPI { + opc, asm, cstr, pattern> { let PostEncoderMethod = "VFPThumb2PostEncoder"; } @@ -1751,8 +1751,8 @@ // Double precision, unary class ADuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, - string asm, list pattern> - : VFPAI { + string asm, string cstr, list pattern> + : VFPAI { // Instruction operands. bits<5> Dd; bits<5> Dm; @@ -1804,7 +1804,7 @@ class ADbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { // Instruction operands. bits<5> Dd; bits<5> Dn; @@ -1862,8 +1862,8 @@ // Single precision, unary, predicated class ASuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, - string asm, list pattern> - : VFPAI { + string asm, string cstr, list pattern> + : VFPAI { // Instruction operands. bits<5> Sd; bits<5> Sm; @@ -1916,14 +1916,14 @@ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> : ASuI { + "", pattern> { list Predicates = [HasVFP2,DontUseNEONForFP]; } // Single precision, binary class ASbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { // Instruction operands. bits<5> Sd; bits<5> Sn; @@ -2000,7 +2000,7 @@ class AHuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { list Predicates = [HasFullFP16]; // Instruction operands. @@ -2056,7 +2056,7 @@ // Half precision, binary class AHbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { list Predicates = [HasFullFP16]; // Instruction operands. @@ -2116,7 +2116,7 @@ class AVConv1I opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; let Inst{19-16} = opcod3; @@ -2149,7 +2149,7 @@ class AVConvXI opcod1, bits<4> opcod2, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list pattern> - : VFPAI { + : VFPAI { let Inst{27-20} = opcod1; let Inst{11-8} = opcod2; let Inst{4} = 1; diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -584,12 +584,12 @@ let Defs = [FPSCR_NZCV] in { def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$Dd, DPR:$Dm), - IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", + IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", "", [(arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm))]>; def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), - IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", + IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", "", [(arm_cmpfpe SPR:$Sd, SPR:$Sm)]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -603,12 +603,12 @@ def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$Dd, DPR:$Dm), - IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", + IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", "", [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>; def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), - IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", + IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", "", [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -627,7 +627,7 @@ def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm", + IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm", "", [(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>; def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, @@ -647,7 +647,7 @@ let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), - IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", + IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", "", [(arm_cmpfpe0 (f64 DPR:$Dd))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -655,7 +655,7 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$Sd), - IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", + IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", "", [(arm_cmpfpe0 SPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -675,7 +675,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$Dd), - IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", + IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", "", [(arm_cmpfp0 (f64 DPR:$Dd))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -683,7 +683,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$Sd), - IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", + IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", "", [(arm_cmpfp0 SPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -704,7 +704,7 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), - IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", + IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", "", [(set DPR:$Dd, (fpextend SPR:$Sm))]>, Sched<[WriteFPCVT]> { // Instruction operands. @@ -723,7 +723,7 @@ // Special case encoding: bits 11-8 is 0b1011. def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, - IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", + IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", "", [(set SPR:$Sd, (fpround DPR:$Dm))]>, Sched<[WriteFPCVT]> { // Instruction operands. @@ -749,7 +749,7 @@ // Between half, single and double-precision. let hasSideEffects = 0 in def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", + /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", "", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; @@ -760,26 +760,30 @@ (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; let hasSideEffects = 0 in -def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", +def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sda, SPR:$Sm), + /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", "$Sd = $Sda", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; def : FP16Pat<(f16 (fpround SPR:$Sm)), - (COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), HPR)>; + (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$Sm), HPR)>; def : FP16Pat<(fp_to_f16 SPR:$a), - (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; + (i32 (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$a), GPR))>; def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane), - (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTBSH SPR:$src2), + (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), + (VCVTBSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane), - (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTBSH SPR:$src2), + (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), + (VCVTBSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; let hasSideEffects = 0 in def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", + /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", "", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; @@ -792,22 +796,26 @@ (SSubReg_f16_reg imm_odd:$lane)))>; let hasSideEffects = 0 in -def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", +def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sda, SPR:$Sm), + /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", "$Sd = $Sda", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane), - (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTTSH SPR:$src2), + (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), + (VCVTTSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane), - (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTTSH SPR:$src2), + (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), + (VCVTTSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs DPR:$Dd), (ins SPR:$Sm), - NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", + NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", "", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFPARMv8, HasDPVFP]>, Sched<[WriteFPCVT]> { @@ -829,8 +837,8 @@ Requires<[HasFPARMv8, HasDPVFP]>; def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, - (outs SPR:$Sd), (ins DPR:$Dm), - NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", + (outs SPR:$Sd), (ins SPR:$Sda, DPR:$Dm), + NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", "$Sd = $Sda", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFPARMv8, HasDPVFP]> { // Instruction operands. @@ -847,15 +855,15 @@ } def : FullFP16Pat<(f16 (fpround DPR:$Dm)), - (COPY_TO_REGCLASS (VCVTBDH DPR:$Dm), HPR)>, + (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$Dm), HPR)>, Requires<[HasFPARMv8, HasDPVFP]>; def : FP16Pat<(fp_to_f16 (f64 DPR:$a)), - (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>, + (i32 (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$a), GPR))>, Requires<[HasFPARMv8, HasDPVFP]>; def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), - NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", + NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", "", []>, Requires<[HasFPARMv8, HasDPVFP]> { // Instruction operands. bits<5> Sm; @@ -868,8 +876,8 @@ } def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, - (outs SPR:$Sd), (ins DPR:$Dm), - NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", + (outs SPR:$Sd), (ins SPR:$Sda, DPR:$Dm), + NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", "$Sd = $Sda", []>, Requires<[HasFPARMv8, HasDPVFP]> { // Instruction operands. bits<5> Sd; @@ -990,7 +998,7 @@ def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", + IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", "", [(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>; def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, @@ -1019,7 +1027,7 @@ def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", + NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", "", [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>, Requires<[HasFPARMv8]> { let Inst{7} = op2; @@ -1027,7 +1035,7 @@ } def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), - NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", + NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", "", [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>, Requires<[HasFPARMv8, HasDPVFP]> { let Inst{7} = op2; @@ -1094,13 +1102,13 @@ def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", + IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", "", [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>, Sched<[WriteFPSQRT64]>; def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", + IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", "", [(set SPR:$Sd, (fsqrt SPR:$Sm))]>, Sched<[WriteFPSQRT32]>; @@ -1113,12 +1121,12 @@ let isMoveReg = 1 in { def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>, + IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", "", []>, Requires<[HasFPRegs64]>; def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), - IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>, + IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", "", []>, Requires<[HasFPRegs]>; } // isMoveReg @@ -1984,7 +1992,7 @@ class BF16_VCVT op7_6> : VFPAI<(outs SPR:$Sd), (ins SPR:$dst, SPR:$Sm), VFPUnaryFrm, NoItinerary, - opc, ".bf16.f32\t$Sd, $Sm", []>, + opc, ".bf16.f32\t$Sd, $Sm", "", []>, RegConstraint<"$dst = $Sd">, Requires<[HasBF16]>, Sched<[]> { @@ -2440,7 +2448,7 @@ class MovFromVFP opc19_16, dag oops, dag iops, string opc, string asm, list pattern>: - VFPAI { + VFPAI { // Instruction operand. bits<4> Rt; @@ -2525,7 +2533,7 @@ class MovToVFP opc19_16, dag oops, dag iops, string opc, string asm, list pattern>: - VFPAI { + VFPAI { // Instruction operand. bits<4> Rt; @@ -2598,7 +2606,7 @@ let isReMaterializable = 1 in { def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm), VFPMiscFrm, IIC_fpUNA64, - "vmov", ".f64\t$Dd, $imm", + "vmov", ".f64\t$Dd, $imm", "", [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3,HasDPVFP]> { bits<5> Dd; @@ -2617,7 +2625,7 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), VFPMiscFrm, IIC_fpUNA32, - "vmov", ".f32\t$Sd, $imm", + "vmov", ".f32\t$Sd, $imm", "", [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { bits<5> Sd; bits<8> imm; @@ -2635,7 +2643,7 @@ def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm), VFPMiscFrm, IIC_fpUNA16, - "vmov", ".f16\t$Sd, $imm", + "vmov", ".f16\t$Sd, $imm", "", [(set (f16 HPR:$Sd), vfp_f16imm:$imm)]>, Requires<[HasFullFP16]> { bits<5> Sd; diff --git a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll --- a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll +++ b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll @@ -1418,12 +1418,11 @@ ; CHECK-FIX-NOSCHED-NEXT: vmov r0, r1, d0 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s5 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, r7, d1 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s14, s14 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s8 ; CHECK-FIX-NOSCHED-NEXT: vmov s1, r1 ; CHECK-FIX-NOSCHED-NEXT: lsr r1, r1, #16 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s1 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s1 ; CHECK-FIX-NOSCHED-NEXT: vmov s1, r0 ; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16 ; CHECK-FIX-NOSCHED-NEXT: vmov s13, r1 @@ -1436,31 +1435,32 @@ ; CHECK-FIX-NOSCHED-NEXT: vmov s2, r3 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s15 -; CHECK-FIX-NOSCHED-NEXT: vmov s9, r3 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 +; CHECK-FIX-NOSCHED-NEXT: vmov s3, r3 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s5 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s9 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s2, s2 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s3 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s2 ; CHECK-FIX-NOSCHED-NEXT: vmov s0, r7 ; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-FIX-NOSCHED-NEXT: vmov s1, r7 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s1 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s1 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r1, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r1, s11 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r1, s12 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s3 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s14 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0 -; CHECK-FIX-NOSCHED-NEXT: vmov r0, s14 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov r1, s12 +; CHECK-FIX-NOSCHED-NEXT: vmov r0, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s13 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s12 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov r1, s5 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r1, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r1 @@ -1469,7 +1469,7 @@ ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s8 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r1, s2 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s3 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s2 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s4 @@ -1746,13 +1746,12 @@ ; CHECK-FIX-NOSCHED-NEXT: vmov r0, r2, d2 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s1 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, r7, d3 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s14, s14 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s8 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s0 ; CHECK-FIX-NOSCHED-NEXT: vmov s5, r2 ; CHECK-FIX-NOSCHED-NEXT: lsr r2, r2, #16 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s5 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s5 ; CHECK-FIX-NOSCHED-NEXT: vmov s5, r0 ; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16 ; CHECK-FIX-NOSCHED-NEXT: vmov s13, r2 @@ -1765,31 +1764,32 @@ ; CHECK-FIX-NOSCHED-NEXT: vmov s6, r3 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s15 -; CHECK-FIX-NOSCHED-NEXT: vmov s9, r3 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 +; CHECK-FIX-NOSCHED-NEXT: vmov s7, r3 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s1 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s9 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s6, s6 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s7 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s6 ; CHECK-FIX-NOSCHED-NEXT: vmov s4, r7 ; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s4, s4 ; CHECK-FIX-NOSCHED-NEXT: vmov s5, r7 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s5 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s4, s4 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s5 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r2, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r2, s11 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r2, s12 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s7 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s14 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0 -; CHECK-FIX-NOSCHED-NEXT: vmov r0, s14 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov r2, s12 +; CHECK-FIX-NOSCHED-NEXT: vmov r0, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s13 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s12 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov r2, s1 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r2, r2, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r2 @@ -1798,7 +1798,7 @@ ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s8 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r2, s6 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s7 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s6 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16 @@ -3788,12 +3788,11 @@ ; CHECK-FIX-NOSCHED-NEXT: vmov r0, r1, d0 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s5 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, r7, d1 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s14, s14 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s8 ; CHECK-FIX-NOSCHED-NEXT: vmov s1, r1 ; CHECK-FIX-NOSCHED-NEXT: lsr r1, r1, #16 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s1 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s1 ; CHECK-FIX-NOSCHED-NEXT: vmov s1, r0 ; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16 ; CHECK-FIX-NOSCHED-NEXT: vmov s13, r1 @@ -3806,31 +3805,32 @@ ; CHECK-FIX-NOSCHED-NEXT: vmov s2, r3 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s15 -; CHECK-FIX-NOSCHED-NEXT: vmov s9, r3 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 +; CHECK-FIX-NOSCHED-NEXT: vmov s3, r3 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s5 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s9 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s2, s2 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s3 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s2 ; CHECK-FIX-NOSCHED-NEXT: vmov s0, r7 ; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-FIX-NOSCHED-NEXT: vmov s1, r7 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s1 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s1 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r1, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r1, s11 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r1, s12 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s3 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s14 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0 -; CHECK-FIX-NOSCHED-NEXT: vmov r0, s14 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov r1, s12 +; CHECK-FIX-NOSCHED-NEXT: vmov r0, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s13 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s12 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov r1, s5 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r1, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r1 @@ -3839,7 +3839,7 @@ ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s8 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r1, s2 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s3 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s2 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s4 @@ -4116,13 +4116,12 @@ ; CHECK-FIX-NOSCHED-NEXT: vmov r0, r2, d2 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s1 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, r7, d3 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s14, s14 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s8 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s0 ; CHECK-FIX-NOSCHED-NEXT: vmov s5, r2 ; CHECK-FIX-NOSCHED-NEXT: lsr r2, r2, #16 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s5 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s5 ; CHECK-FIX-NOSCHED-NEXT: vmov s5, r0 ; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16 ; CHECK-FIX-NOSCHED-NEXT: vmov s13, r2 @@ -4135,31 +4134,32 @@ ; CHECK-FIX-NOSCHED-NEXT: vmov s6, r3 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s15 -; CHECK-FIX-NOSCHED-NEXT: vmov s9, r3 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 +; CHECK-FIX-NOSCHED-NEXT: vmov s7, r3 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s1 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s9 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s6, s6 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s7 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s6 ; CHECK-FIX-NOSCHED-NEXT: vmov s4, r7 ; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s4, s4 ; CHECK-FIX-NOSCHED-NEXT: vmov s5, r7 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s5 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s4, s4 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s5 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r2, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r2, s11 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r2, s12 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s7 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s14 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0 -; CHECK-FIX-NOSCHED-NEXT: vmov r0, s14 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov r2, s12 +; CHECK-FIX-NOSCHED-NEXT: vmov r0, s12 ; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s13 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s12 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov r2, s1 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r2, r2, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r2 @@ -4168,7 +4168,7 @@ ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s8 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov r2, s6 -; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s9 +; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s7 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, s6 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16 diff --git a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll --- a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll +++ b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll @@ -804,8 +804,8 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vmov q5, q0 ; CHECK-NEXT: vmov q4, q1 ; CHECK-NEXT: vcvtb.f32.f16 s0, s20 @@ -821,52 +821,52 @@ ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s24, s16 +; CHECK-NEXT: vcvtt.f16.f32 s24, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s21 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s25, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s21 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s25, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s22 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s26, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s22 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s26, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s23 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s27, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s23 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: vcvtt.f16.f32 s27, s0 +; CHECK-NEXT: vmov q0, q6 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: pop {r7, pc} entry: %out = frem <8 x half> %in1, %in2 diff --git a/llvm/test/CodeGen/Thumb2/mve-fmath.ll b/llvm/test/CodeGen/Thumb2/mve-fmath.ll --- a/llvm/test/CodeGen/Thumb2/mve-fmath.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fmath.ll @@ -99,8 +99,8 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16 ; CHECK-NEXT: vmov r0, s0 @@ -111,40 +111,40 @@ ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.cos.v8f16(<8 x half> %src) @@ -210,8 +210,8 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16 ; CHECK-NEXT: vmov r0, s0 @@ -222,40 +222,40 @@ ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.sin.v8f16(<8 x half> %src) @@ -321,8 +321,8 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16 ; CHECK-NEXT: vmov r0, s0 @@ -333,40 +333,40 @@ ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl expf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl expf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl expf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl expf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl expf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl expf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl expf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.exp.v8f16(<8 x half> %src) @@ -432,8 +432,8 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16 ; CHECK-NEXT: vmov r0, s0 @@ -444,40 +444,40 @@ ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.exp2.v8f16(<8 x half> %src) @@ -543,8 +543,8 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16 ; CHECK-NEXT: vmov r0, s0 @@ -555,40 +555,40 @@ ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl logf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl logf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl logf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl logf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl logf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl logf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl logf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.log.v8f16(<8 x half> %src) @@ -654,8 +654,8 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16 ; CHECK-NEXT: vmov r0, s0 @@ -666,40 +666,40 @@ ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.log2.v8f16(<8 x half> %src) @@ -765,8 +765,8 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16 ; CHECK-NEXT: vmov r0, s0 @@ -777,40 +777,40 @@ ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.log10.v8f16(<8 x half> %src) @@ -881,8 +881,8 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vmov q5, q0 ; CHECK-NEXT: vmov q4, q1 ; CHECK-NEXT: vcvtb.f32.f16 s0, s20 @@ -898,52 +898,52 @@ ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl powf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s16 -; CHECK-NEXT: vcvtt.f16.f32 s16, s0 +; CHECK-NEXT: vcvtb.f16.f32 s24, s16 +; CHECK-NEXT: vcvtt.f16.f32 s24, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s21 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl powf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s25, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s21 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl powf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s17, s0 +; CHECK-NEXT: vcvtt.f16.f32 s25, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s22 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl powf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s26, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s22 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl powf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s18, s0 +; CHECK-NEXT: vcvtt.f16.f32 s26, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s23 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl powf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vcvtb.f16.f32 s27, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s23 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: bl powf ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vcvtt.f16.f32 s19, s0 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: vcvtt.f16.f32 s27, s0 +; CHECK-NEXT: vmov q0, q6 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: pop {r7, pc} entry: %0 = call fast <8 x half> @llvm.pow.v8f16(<8 x half> %src1, <8 x half> %src2) diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll --- a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll @@ -1204,7 +1204,7 @@ ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: vcmp.f32 s1, #0 ; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 -; CHECK-LE-NEXT: vcvtb.f16.f32 s6, s2 +; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2 ; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 ; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 ; CHECK-LE-NEXT: csetm r2, gt @@ -1263,8 +1263,8 @@ ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: vcmp.f32 s6, #0 +; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 -; CHECK-BE-NEXT: vcvtb.f16.f32 s2, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 ; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr @@ -1328,7 +1328,7 @@ ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: vcmp.f32 s1, #0 ; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 -; CHECK-LE-NEXT: vcvtb.f16.f32 s6, s2 +; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2 ; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 ; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 ; CHECK-LE-NEXT: csetm r2, gt @@ -1387,8 +1387,8 @@ ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: vcmp.f32 s6, #0 +; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 -; CHECK-BE-NEXT: vcvtb.f16.f32 s2, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 ; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr @@ -1452,7 +1452,7 @@ ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: vcmp.f32 s1, #0 ; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 -; CHECK-LE-NEXT: vcvtb.f16.f32 s6, s2 +; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2 ; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 ; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 ; CHECK-LE-NEXT: csetm r2, gt @@ -1519,8 +1519,8 @@ ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: vcmp.f32 s6, #0 +; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 -; CHECK-BE-NEXT: vcvtb.f16.f32 s2, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 ; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll --- a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll @@ -354,10 +354,10 @@ ; CHECK-MVE-LABEL: vmovn32_trunc1: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-MVE-NEXT: vcvtb.f16.f32 s1, s1 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s2, s2 +; CHECK-MVE-NEXT: vcvtb.f16.f32 s3, s3 ; CHECK-MVE-NEXT: vcvtt.f16.f32 s0, s4 -; CHECK-MVE-NEXT: vcvtb.f16.f32 s4, s1 -; CHECK-MVE-NEXT: vcvtb.f16.f32 s4, s3 ; CHECK-MVE-NEXT: vcvtt.f16.f32 s1, s5 ; CHECK-MVE-NEXT: vcvtt.f16.f32 s2, s6 ; CHECK-MVE-NEXT: vcvtt.f16.f32 s3, s7 @@ -377,14 +377,15 @@ define arm_aapcs_vfpcc <8 x half> @vmovn32_trunc2(<4 x float> %src1, <4 x float> %src2) { ; CHECK-MVE-LABEL: vmovn32_trunc2: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcvtt.f16.f32 s0, s0 -; CHECK-MVE-NEXT: vcvtt.f16.f32 s1, s1 -; CHECK-MVE-NEXT: vcvtt.f16.f32 s2, s2 -; CHECK-MVE-NEXT: vcvtt.f16.f32 s3, s3 -; CHECK-MVE-NEXT: vcvtb.f16.f32 s4, s4 -; CHECK-MVE-NEXT: vcvtb.f16.f32 s4, s5 -; CHECK-MVE-NEXT: vcvtb.f16.f32 s4, s6 -; CHECK-MVE-NEXT: vcvtb.f16.f32 s4, s7 +; CHECK-MVE-NEXT: vmov q2, q0 +; CHECK-MVE-NEXT: vcvtb.f16.f32 s0, s4 +; CHECK-MVE-NEXT: vcvtb.f16.f32 s1, s5 +; CHECK-MVE-NEXT: vcvtb.f16.f32 s2, s6 +; CHECK-MVE-NEXT: vcvtb.f16.f32 s3, s7 +; CHECK-MVE-NEXT: vcvtt.f16.f32 s0, s8 +; CHECK-MVE-NEXT: vcvtt.f16.f32 s1, s9 +; CHECK-MVE-NEXT: vcvtt.f16.f32 s2, s10 +; CHECK-MVE-NEXT: vcvtt.f16.f32 s3, s11 ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: vmovn32_trunc2: diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt16.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt16.ll --- a/llvm/test/CodeGen/Thumb2/mve-vcvt16.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcvt16.ll @@ -37,8 +37,8 @@ ; CHECK-LABEL: fptrunc_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s2 ; CHECK-NEXT: vcvtt.f16.f32 s0, s1 +; CHECK-NEXT: vcvtb.f16.f32 s1, s2 ; CHECK-NEXT: vcvtt.f16.f32 s1, s3 ; CHECK-NEXT: bx lr entry: @@ -50,13 +50,13 @@ ; CHECK-LABEL: fptrunc_8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-NEXT: vcvtb.f16.f32 s2, s2 -; CHECK-NEXT: vcvtb.f16.f32 s2, s4 ; CHECK-NEXT: vcvtt.f16.f32 s0, s1 +; CHECK-NEXT: vcvtb.f16.f32 s1, s2 +; CHECK-NEXT: vcvtb.f16.f32 s2, s4 ; CHECK-NEXT: vcvtt.f16.f32 s1, s3 +; CHECK-NEXT: vcvtb.f16.f32 s3, s6 ; CHECK-NEXT: vcvtt.f16.f32 s2, s5 ; CHECK-NEXT: vcvtt.f16.f32 s3, s7 -; CHECK-NEXT: vcvtb.f16.f32 s4, s6 ; CHECK-NEXT: bx lr entry: %out = fptrunc <8 x float> %src1 to <8 x half>