Index: lib/Target/ARM/ARMInstrVFP.td =================================================================== --- lib/Target/ARM/ARMInstrVFP.td +++ lib/Target/ARM/ARMInstrVFP.td @@ -336,13 +336,15 @@ def VADDD : ADbI<0b11100, 0b11, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>; + [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>, + Sched<[WriteFPALU64]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in def VADDS : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> { + [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]>, + Sched<[WriteFPALU32]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -358,13 +360,15 @@ def VSUBD : ADbI<0b11100, 0b11, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>; + [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>, + Sched<[WriteFPALU64]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> { + [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]>, + Sched<[WriteFPALU32]>{ // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -380,31 +384,36 @@ def VDIVD : ADbI<0b11101, 0b00, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>; + [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>, + Sched<[WriteFPDIV64]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in def VDIVS : ASbI<0b11101, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>; + [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>, + Sched<[WriteFPDIV32]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in def VDIVH : AHbI<0b11101, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm", - []>; + []>, + Sched<[WriteFPDIV32]>; let TwoOperandAliasConstraint = "$Dn = $Dd" in def VMULD : ADbI<0b11100, 0b10, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>; + [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>, + Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> { + [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]>, + Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -414,17 +423,20 @@ def VMULH : AHbI<0b11100, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm", - []>; + []>, + Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>; def VNMULD : ADbI<0b11100, 0b10, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>; + [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>, + Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>; def VNMULS : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> { + [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]>, + Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -433,7 +445,8 @@ def VNMULH : AHbI<0b11100, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm", - []>; + []>, + Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>; multiclass vsel_inst opc, int CC> { let DecoderNamespace = "VFPV8", PostEncoderMethod = "", @@ -624,7 +637,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", - [(set DPR:$Dd, (fpextend SPR:$Sm))]> { + [(set DPR:$Dd, (fpextend SPR:$Sm))]>, + Sched<[WriteFPCVT]> { // Instruction operands. bits<5> Dd; bits<5> Sm; @@ -641,7 +655,8 @@ // Special case encoding: bits 11-8 is 0b1011. def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (fpround DPR:$Dm))]> { + [(set SPR:$Sd, (fpround DPR:$Dm))]>, + Sched<[WriteFPCVT]> { // Instruction operands. bits<5> Sd; bits<5> Dm; @@ -667,27 +682,32 @@ def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>, - Requires<[HasFP16]>; + Requires<[HasFP16]>, + Sched<[WriteFPCVT]>; def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>, - Requires<[HasFP16]>; + Requires<[HasFP16]>, + Sched<[WriteFPCVT]>; def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>, - Requires<[HasFP16]>; + Requires<[HasFP16]>, + Sched<[WriteFPCVT]>; def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>, - Requires<[HasFP16]>; + Requires<[HasFP16]>, + Sched<[WriteFPCVT]>; def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs DPR:$Dd), (ins SPR:$Sm), NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", - []>, Requires<[HasFPARMv8, HasDPVFP]> { + []>, Requires<[HasFPARMv8, HasDPVFP]>, + Sched<[WriteFPCVT]> { // Instruction operands. bits<5> Sm; @@ -946,12 +966,14 @@ def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", - [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>; + [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>, + Sched<[WriteFPSQRT64]>; def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", - [(set SPR:$Sd, (fsqrt SPR:$Sm))]>; + [(set SPR:$Sd, (fsqrt SPR:$Sm))]>, + Sched<[WriteFPSQRT32]>; def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), @@ -987,7 +1009,8 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$Rt), (ins SPR:$Sn), IIC_fpMOVSI, "vmov", "\t$Rt, $Sn", - [(set GPR:$Rt, (bitconvert SPR:$Sn))]> { + [(set GPR:$Rt, (bitconvert SPR:$Sn))]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<4> Rt; bits<5> Sn; @@ -1010,7 +1033,8 @@ (outs SPR:$Sn), (ins GPR:$Rt), IIC_fpMOVIS, "vmov", "\t$Sn, $Rt", [(set SPR:$Sn, (bitconvert GPR:$Rt))]>, - Requires<[HasVFP2, UseVMOVSR]> { + Requires<[HasVFP2, UseVMOVSR]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<5> Sn; bits<4> Rt; @@ -1032,7 +1056,8 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm), IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm", - [/* FIXME: Can't write pattern for multiple result instr*/]> { + [/* FIXME: Can't write pattern for multiple result instr*/]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<5> Dm; bits<4> Rt; @@ -1059,7 +1084,8 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2), IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2", - [/* For disassembly only; pattern left blank */]> { + [/* For disassembly only; pattern left blank */]>, + Sched<[WriteFPMOV]> { bits<5> src1; bits<4> Rt; bits<4> Rt2; @@ -1085,7 +1111,8 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011, (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2), IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2", - [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> { + [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<5> Dm; bits<4> Rt; @@ -1128,7 +1155,8 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", - [/* For disassembly only; pattern left blank */]> { + [/* For disassembly only; pattern left blank */]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<5> dst1; bits<4> src1; @@ -1154,7 +1182,8 @@ (outs GPR:$Rt), (ins SPR:$Sn), IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn", []>, - Requires<[HasFullFP16]> { + Requires<[HasFullFP16]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<4> Rt; bits<5> Sn; @@ -1173,7 +1202,8 @@ (outs SPR:$Sn), (ins GPR:$Rt), IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt", []>, - Requires<[HasFullFP16]> { + Requires<[HasFullFP16]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<5> Sn; bits<4> Rt; @@ -1254,7 +1284,8 @@ def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // s32 } @@ -1269,7 +1300,8 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$Sd),(ins SPR:$Sm), IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // s32 // Some single precision VFP instructions may be executed on both NEON and @@ -1286,14 +1318,16 @@ def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTIH, "vcvt", ".f16.s32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // s32 } def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // u32 } @@ -1308,7 +1342,8 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // u32 // Some single precision VFP instructions may be executed on both NEON and @@ -1325,7 +1360,8 @@ def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTIH, "vcvt", ".f16.u32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // u32 } @@ -1390,7 +1426,8 @@ def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit } @@ -1405,7 +1442,8 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit // Some single precision VFP instructions may be executed on both NEON and @@ -1423,14 +1461,16 @@ def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit } def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit } @@ -1445,7 +1485,8 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit // Some single precision VFP instructions may be executed on both NEON and @@ -1463,7 +1504,8 @@ def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit } @@ -1473,42 +1515,48 @@ def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{ + [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm", - [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> { + [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTHI, "vcvtr", ".s32.f16\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{ + [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm", - [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> { + [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTHI, "vcvtr", ".u32.f16\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } } @@ -1528,8 +1576,7 @@ class AVConv1XInsS_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : AVConv1XI, - Sched<[WriteCvtFP]> { + : AVConv1XI { bits<5> dst; // if dp_operation then UInt(D:Vd) else UInt(Vd:D); let Inst{22} = dst{0}; @@ -1540,8 +1587,7 @@ class AVConv1XInsD_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : AVConv1XI, - Sched<[WriteCvtFP]> { + : AVConv1XI { bits<5> dst; // if dp_operation then UInt(D:Vd) else UInt(Vd:D); let Inst{22} = dst{4}; @@ -1553,26 +1599,31 @@ def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VTOUHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTHI, "vcvt", ".u16.f16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VTOSLH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTHI, "vcvt", ".s32.f16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTHI, "vcvt", ".u32.f16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), - IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> { + IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1604,45 +1655,54 @@ def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), - IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>; + IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), - IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>; + IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), - IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>; + IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), - IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>; + IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; // Fixed-Point to FP: def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VUHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTIH, "vcvt", ".f16.u16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VSLTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTIH, "vcvt", ".f16.s32\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTIH, "vcvt", ".f16.u32\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> { + IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1650,7 +1710,8 @@ def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> { + IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1658,7 +1719,8 @@ def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> { + IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1666,7 +1728,8 @@ def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> { + IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1674,19 +1737,23 @@ def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), - IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>; + IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), - IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>; + IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), - IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>; + IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), - IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>; + IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; } // End of 'let Constraints = "$a = $dst" in' @@ -1700,7 +1767,8 @@ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1708,7 +1776,8 @@ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1734,7 +1803,8 @@ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1742,7 +1812,8 @@ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1768,7 +1839,8 @@ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1776,7 +1848,8 @@ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1802,14 +1875,16 @@ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1838,7 +1913,8 @@ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFMAS : ASbIn<0b11101, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1846,7 +1922,8 @@ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -1856,7 +1933,8 @@ IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, Index: lib/Target/ARM/ARMSchedule.td =================================================================== --- lib/Target/ARM/ARMSchedule.td +++ lib/Target/ARM/ARMSchedule.td @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Instruction scheduling annotations for out-of-order CPUs. +// Instruction scheduling annotations for in-order and out-of-order CPUs. // These annotations are independent of the itinerary class defined below. // Here we define the subtarget independent read/write per-operand resources. // The subtarget schedule definitions will then map these to the subtarget's @@ -54,6 +54,9 @@ // } // def : ReadAdvance; +//===----------------------------------------------------------------------===// +// Sched definitions for intger pipeline instructions +// // Basic ALU operation. def WriteALU : SchedWrite; def ReadALU : SchedRead; @@ -81,12 +84,38 @@ def WriteBrL : SchedWrite; def WriteBrTbl : SchedWrite; -// Fixpoint conversions. -def WriteCvtFP : SchedWrite; - // Noop. def WriteNoop : SchedWrite; +//===----------------------------------------------------------------------===// +// Sched definitions for floating-point and neon instructions +// +// Floating point conversions +def WriteFPCVT : SchedWrite; +def WriteFPMOV : SchedWrite; // FP -> GPR and vice-versa + +// ALU operations (32/64-bit) +def WriteFPALU32 : SchedWrite; +def WriteFPALU64 : SchedWrite; + +// Multiplication +def WriteFPMUL32 : SchedWrite; +def WriteFPMUL64 : SchedWrite; +def ReadFPMUL : SchedRead; // multiplier read +def ReadFPMAC : SchedRead; // accumulator read + +// Multiply-accumulate +def WriteFPMAC32 : SchedWrite; +def WriteFPMAC64 : SchedWrite; + +// Division +def WriteFPDIV32 : SchedWrite; +def WriteFPDIV64 : SchedWrite; + +// Square-root +def WriteFPSQRT32 : SchedWrite; +def WriteFPSQRT64 : SchedWrite; + // Define TII for use in SchedVariant Predicates. def : PredicateProlog<[{ const ARMBaseInstrInfo *TII = Index: lib/Target/ARM/ARMScheduleA9.td =================================================================== --- lib/Target/ARM/ARMScheduleA9.td +++ lib/Target/ARM/ARMScheduleA9.td @@ -1945,6 +1945,11 @@ def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4; let NumMicroOps = 0; } +def : WriteRes { let Latency = 15; } +def : WriteRes { let Latency = 25; } +def : WriteRes { let Latency = 17; } +def : WriteRes { let Latency = 32; } + // Floating-point // Only one FP or AGU instruction may issue per cycle. We model this // by having FP instructions consume the AGU resource. @@ -2471,6 +2476,28 @@ def : SchedAlias; def : SchedAlias; def : SchedAlias; + +// ===---------------------------------------------------------------------===// +// Floating-point. Map target defined SchedReadWrite to processor specific ones +// +def : WriteRes { let Latency = 4; } +def : SchedAlias; + +def : SchedAlias; +def : SchedAlias; + +def : SchedAlias; +def : SchedAlias; + +def : SchedAlias; +def : SchedAlias; + +def : ReadAdvance; +def : ReadAdvance; + +// ===---------------------------------------------------------------------===// +// Subtarget-specific overrides. Map opcodes to list of SchedReadWrite types. +// def : InstRW< [WriteALU], (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr", "BICrr")>; @@ -2524,6 +2551,6 @@ def : WriteRes; def : WriteRes; def : WriteRes; -def : SchedAlias; +//def : SchedAlias; def : WriteRes { let Latency = 0; let NumMicroOps = 0; } } // SchedModel = CortexA9Model Index: lib/Target/ARM/ARMScheduleR52.td =================================================================== --- lib/Target/ARM/ARMScheduleR52.td +++ lib/Target/ARM/ARMScheduleR52.td @@ -86,12 +86,45 @@ // Misc def : WriteRes { let Latency = 0; let NumMicroOps = 0; } -def : WriteRes { let Latency = 3; } +// Integer pipeline by-passes def : ReadAdvance; // Operand needed in EX1 stage def : ReadAdvance; // Shift operands needed in ISS +// Floating-point. Map target-defined SchedReadWrites to subtarget +def : WriteRes { let Latency = 6; } + +def : WriteRes { + let Latency = 6; +} + +def : WriteRes { + let Latency = 11; // as it is internally two insns (MUL then ADD) +} + +def : WriteRes { + let Latency = 11; +} + +def : WriteRes { + let Latency = 7; // FP div takes fixed #cycles + let ResourceCycles = [7]; // is not pipelined +} + +def : WriteRes { + let Latency = 17; + let ResourceCycles = [17]; +} + +def : WriteRes { let Latency = 7; } +def : WriteRes { let Latency = 17; } + +def : ReadAdvance; // mul operand read in F1 +def : ReadAdvance; // fp-mac operand read in F1 + + //===----------------------------------------------------------------------===// // Subtarget-specific SchedReadWrites. @@ -147,19 +180,17 @@ def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; } def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; } -def R52WriteFPDIV_SP : SchedWriteRes<[R52UnitFPDIV]> { - let Latency = 7; // FP div takes fixed #cycles - let ResourceCycles = [7]; // is not pipelined - } -def R52WriteFPDIV_DP : SchedWriteRes<[R52UnitFPDIV]> { - let Latency = 17; - let ResourceCycles = [17]; -} - - //===----------------------------------------------------------------------===// -// Subtarget-specific - map operands to SchedReadWrites +// Floating-point. Map target defined SchedReadWrites to processor specific ones +// +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +//===----------------------------------------------------------------------===// +// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types. +// def : InstRW<[WriteALU], (instrs COPY)>; def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], @@ -492,12 +523,6 @@ def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>; def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>; -def : InstRW<[R52WriteFPDIV_SP, R52Read_F0, R52Read_F0], (instregex "VDIV(S|H)")>; -def : InstRW<[R52WriteFPDIV_DP, R52Read_F0, R52Read_F0], (instregex "VDIVD")>; - -def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], - (instregex "(VFMA|VFMS|VFNMA|VFNMS)(D|H|S)")>; - def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>; def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>; @@ -777,9 +802,8 @@ def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>; def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>; -def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VMOV", "VORR", "VORN", "VREV")>; +def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>; def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>; -def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VMUL", "VNMUL", "VMLA")>; def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>; def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>; def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>; Index: lib/Target/ARM/ARMScheduleSwift.td =================================================================== --- lib/Target/ARM/ARMScheduleSwift.td +++ lib/Target/ARM/ARMScheduleSwift.td @@ -304,6 +304,7 @@ def : InstRW <[SwiftDiv], (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>; + // 4.2.19 Integer Load Single Element // 4.2.20 Integer Load Signextended def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> { @@ -597,8 +598,6 @@ def : InstRW<[SwiftWriteP1FourCycle], (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH", "VMULL", "VQDMULL")>; - def : InstRW<[SwiftWriteP1SixCycle], - (instregex "VMULD", "VNMULD")>; def : InstRW<[SwiftWriteP1FourCycle], (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)", "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>; @@ -608,7 +607,7 @@ // 4.2.36 Advanced SIMD and VFP, Convert def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>; // Fixpoint conversions. - def : WriteRes { let Latency = 4; } + //def : WriteRes { let Latency = 4; } // 4.2.37 Advanced SIMD and VFP, Move def : InstRW<[SwiftWriteP0TwoCycle], @@ -1036,6 +1035,30 @@ def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>; def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>; + // ===---------------------------------------------------------------------===// + // Floating-point. Map target defined SchedReadWrite to processor specific ones + // + def : SchedAlias; + def : SchedAlias; + + def : SchedAlias; + def : SchedAlias; + + def : SchedAlias; + def : SchedAlias; + + def : SchedAlias; + def : SchedAlias; + + def : SchedAlias; + def : SchedAlias; + + def : SchedAlias; + def : SchedAlias; + + def : ReadAdvance; + def : ReadAdvance; + // Not specified. def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>; // Preload. Index: test/CodeGen/ARM/misched-fp-basic.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/misched-fp-basic.ll @@ -0,0 +1,69 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a9 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \ +; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9 +; RUN: llc < %s -mtriple=arm-eabi -mcpu=swift -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \ +; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-r52 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \ +; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52 +; +; Check the latency of instructions for processors with sched-models +; +; Function Attrs: norecurse nounwind readnone +define i32 @foo(float %a, float %b, float %c, i32 %d) local_unnamed_addr #0 { +entry: +; +; CHECK: ********** MI Scheduling ********** +; CHECK_A9: VADDS +; CHECK_SWIFT: VADDfd +; CHECK_R52: VADDS +; CHECK_A9: Latency : 5 +; CHECK_SWIFT: Latency : 4 +; CHECK_R52: Latency : 6 +; +; CHECK_A9: VMULS +; CHECK_SWIFT: VMULfd +; CHECK_R52: VMULS +; CHECK_SWIFT: Latency : 4 +; CHECK_A9: Latency : 6 +; CHECK_R52: Latency : 6 +; +; CHECK: VDIVS +; CHECK_SWIFT: Latency : 17 +; CHECK_A9: Latency : 16 +; CHECK_R52: Latency : 7 +; +; CHECK: VCVTDS +; CHECK_SWIFT: Latency : 4 +; CHECK_A9: Latency : 5 +; CHECK_R52: Latency : 6 +; +; CHECK: VADDD +; CHECK_SWIFT: Latency : 6 +; CHECK_A9: Latency : 5 +; CHECK_R52: Latency : 6 +; +; CHECK: VMULD +; CHECK_SWIFT: Latency : 6 +; CHECK_A9: Latency : 7 +; CHECK_R52: Latency : 6 +; +; CHECK: VDIVD +; CHECK_SWIFT: Latency : 32 +; CHECK_A9: Latency : 26 +; CHECK_R52: Latency : 17 +; +; CHECK: VTOSIZD +; CHECK_SWIFT: Latency : 4 +; CHECK_A9: Latency : 5 +; CHECK_R52: Latency : 6 +; + %add = fadd float %a, %b + %mul = fmul float %add, %add + %div = fdiv float %mul, %b + %conv1 = fpext float %div to double + %add3 = fadd double %conv1, %conv1 + %mul4 = fmul double %add3, %add3 + %div5 = fdiv double %mul4, %conv1 + %conv6 = fptosi double %div5 to i32 + ret i32 %conv6 +}