Index: lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- lib/Target/AArch64/AArch64InstrFormats.td +++ lib/Target/AArch64/AArch64InstrFormats.td @@ -9377,7 +9377,8 @@ class BaseCAS : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), "cas" # order # size, "\t$Rs, $Rt, [$Rn]", - "$out = $Rs",[]> { + "$out = $Rs",[]>, + Sched<[WriteAtomic]> { let NP = 1; } @@ -9391,7 +9392,8 @@ class BaseCASP : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), "casp" # order # size, "\t$Rs, $Rt, [$Rn]", - "$out = $Rs",[]> { + "$out = $Rs",[]>, + Sched<[WriteAtomic]> { let NP = 0; } @@ -9405,7 +9407,8 @@ let Predicates = [HasV8_1a] in class BaseSWP : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size, - "\t$Rs, $Rt, [$Rn]","",[]> { + "\t$Rs, $Rt, [$Rn]","",[]>, + Sched<[WriteAtomic]> { bits<2> Sz; bit Acq; bit Rel; @@ -9436,7 +9439,8 @@ let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in class BaseLDOPregister : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size, - "\t$Rs, $Rt, [$Rn]","",[]> { + "\t$Rs, $Rt, [$Rn]","",[]>, + Sched<[WriteAtomic]> { bits<2> Sz; bit Acq; bit Rel; Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -312,10 +312,13 @@ //===----------------------------------------------------------------------===// let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { +// We set Sched to empty list because we expect these instructions to simply get +// removed in most cases. def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), - [(AArch64callseq_start timm:$amt)]>; + [(AArch64callseq_start timm:$amt)]>, Sched<[]>; def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), - [(AArch64callseq_end timm:$amt1, timm:$amt2)]>; + [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, + Sched<[]>; } // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 let isReMaterializable = 1, isCodeGenOnly = 1 in { @@ -1206,7 +1209,8 @@ // Create a separate pseudo-instruction for codegen to use so that we don't // flag lr as used in every function. It'll be restored before the RET by the // epilogue if it's legitimately used. -def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]> { +def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]>, + Sched<[WriteBrReg]> { let isTerminator = 1; let isBarrier = 1; let isReturn = 1; @@ -1216,7 +1220,7 @@ // R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction // (which in the usual case is a BLR). let hasSideEffects = 1 in -def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> { +def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> { let AsmString = ".tlsdesccall $sym"; } @@ -1226,7 +1230,8 @@ isCodeGenOnly = 1 in def TLSDESC_CALLSEQ : Pseudo<(outs), (ins i64imm:$sym), - [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>; + [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>, + Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>; def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), (TLSDESC_CALLSEQ texternalsym:$sym)>; @@ -2536,9 +2541,11 @@ let isReMaterializable = 1, isCodeGenOnly = 1 in { def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, PseudoInstExpansion<(FMOVWSr FPR32:$Rd, WZR)>, + Sched<[WriteF]>, Requires<[NoZCZ]>; def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, PseudoInstExpansion<(FMOVXDr FPR64:$Rd, XZR)>, + Sched<[WriteF]>, Requires<[NoZCZ]>; } @@ -2665,6 +2672,7 @@ (i32 imm:$cond), NZCV))]> { let Uses = [NZCV]; let usesCustomInserter = 1; + let hasNoSchedulingInfo = 1; } @@ -6038,8 +6046,10 @@ // Tail call return handling. These are all compiler pseudo-instructions, // so no encoding information or anything like that. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { - def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff),[]>; - def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>; + def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>, + Sched<[WriteBrReg]>; + def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>, + Sched<[WriteBrReg]>; } def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), Index: lib/Target/AArch64/AArch64SchedA53.td =================================================================== --- lib/Target/AArch64/AArch64SchedA53.td +++ lib/Target/AArch64/AArch64SchedA53.td @@ -109,6 +109,8 @@ def A53WriteVST3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6; let ResourceCycles = [3]; } +def : WriteRes { let Unsupported = 1; } + // Branch def : WriteRes; def : WriteRes; Index: lib/Target/AArch64/AArch64SchedA57.td =================================================================== --- lib/Target/AArch64/AArch64SchedA57.td +++ lib/Target/AArch64/AArch64SchedA57.td @@ -96,6 +96,8 @@ def : SchedAlias; def : SchedAlias; +def : WriteRes { let Unsupported = 1; } + def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } Index: lib/Target/AArch64/AArch64SchedCyclone.td =================================================================== --- lib/Target/AArch64/AArch64SchedCyclone.td +++ lib/Target/AArch64/AArch64SchedCyclone.td @@ -17,6 +17,8 @@ let MicroOpBufferSize = 192; // Based on the reorder buffer. let LoadLatency = 4; // Optimistic load latency. let MispredictPenalty = 16; // 14-19 cycles are typical. + let CompleteModel = 1; + let CheckCompleteness = 1; } //===----------------------------------------------------------------------===// @@ -726,7 +728,7 @@ def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV], (instrs LD3Rv1d,LD3Rv2d)>; def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV], - (instrs LD3Rv2d_POST,LD3Rv2d_POST)>; + (instrs LD3Rv1d_POST,LD3Rv2d_POST)>; def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV], (instregex "LD4Fourv(8b|4h|2s)$")>; @@ -851,6 +853,9 @@ def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST4i64)>; def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>; +// Atomic operations are not supported. +def : WriteRes { let Unsupported = 1; } + //--- // Unused SchedRead types //--- Index: lib/Target/AArch64/AArch64SchedKryo.td =================================================================== --- lib/Target/AArch64/AArch64SchedKryo.td +++ lib/Target/AArch64/AArch64SchedKryo.td @@ -99,6 +99,8 @@ def : WriteRes { let Latency = 4; } +def : WriteRes { let Unsupported = 1; } + // No forwarding logic is modelled yet. def : ReadAdvance; def : ReadAdvance; Index: lib/Target/AArch64/AArch64SchedM1.td =================================================================== --- lib/Target/AArch64/AArch64SchedM1.td +++ lib/Target/AArch64/AArch64SchedM1.td @@ -356,4 +356,7 @@ // CRC instructions. def : InstRW<[M1WriteC2], (instregex "^CRC32")>; +// atomic memory operations. +def : WriteRes { let Unsupported = 1; } + } // SchedModel = ExynosM1Model Index: lib/Target/AArch64/AArch64Schedule.td =================================================================== --- lib/Target/AArch64/AArch64Schedule.td +++ lib/Target/AArch64/AArch64Schedule.td @@ -92,6 +92,8 @@ def WriteVLD : SchedWrite; // Vector loads. def WriteVST : SchedWrite; // Vector stores. +def WriteAtomic : SchedWrite; // Atomic memory operations (CAS, Swap, LDOP) + // Read the unwritten lanes of the VLD's destination registers. def ReadVLD : SchedRead;