Index: lib/Target/ARM64/ARM64InstrInfo.h =================================================================== --- lib/Target/ARM64/ARM64InstrInfo.h +++ lib/Target/ARM64/ARM64InstrInfo.h @@ -56,8 +56,13 @@ unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const override; - /// \brief Is there a non-zero immediate? - bool hasNonZeroImm(const MachineInstr *MI) const; + /// Returns true if there is a shiftable register and that the shift value + /// is non-zero. + bool hasShiftedReg(const MachineInstr *MI) const; + + /// Returns true if there is an extendable register and that the extending value + /// is non-zero. + bool hasExtendedReg(const MachineInstr *MI) const; /// \brief Does this instruction set its full destination register to zero? bool isGPRZero(const MachineInstr *MI) const; Index: lib/Target/ARM64/ARM64InstrInfo.cpp =================================================================== --- lib/Target/ARM64/ARM64InstrInfo.cpp +++ lib/Target/ARM64/ARM64InstrInfo.cpp @@ -841,10 +841,73 @@ } /// Return true if this is this instruction has a non-zero immediate -bool ARM64InstrInfo::hasNonZeroImm(const MachineInstr *MI) const { - if (MI->getOperand(3).isImm()) { - unsigned val = MI->getOperand(3).getImm(); - return (val != 0); +bool ARM64InstrInfo::hasShiftedReg(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case ARM64::ADDSWrs: + case ARM64::ADDSXrs: + case ARM64::ADDWrs: + case ARM64::ADDXrs: + case ARM64::ANDSWrs: + case ARM64::ANDSXrs: + case ARM64::ANDWrs: + case ARM64::ANDXrs: + case ARM64::BICSWrs: + case ARM64::BICSXrs: + case ARM64::BICWrs: + case ARM64::BICXrs: + case ARM64::CRC32Brr: + case ARM64::CRC32CBrr: + case ARM64::CRC32CHrr: + case ARM64::CRC32CWrr: + case ARM64::CRC32CXrr: + case ARM64::CRC32Hrr: + case ARM64::CRC32Wrr: + case ARM64::CRC32Xrr: + case ARM64::EONWrs: + case ARM64::EONXrs: + case ARM64::EORWrs: + case ARM64::EORXrs: + case ARM64::ORNWrs: + case ARM64::ORNXrs: + case ARM64::ORRWrs: + case ARM64::ORRXrs: + case ARM64::SUBSWrs: + case ARM64::SUBSXrs: + case ARM64::SUBWrs: + case ARM64::SUBXrs: + if (MI->getOperand(3).isImm()) { + unsigned val = MI->getOperand(3).getImm(); + return (val != 0); + } + break; + default: + break; + } + return false; +} + +/// Return true if this is this instruction has a non-zero immediate +bool ARM64InstrInfo::hasExtendedReg(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case ARM64::ADDSWrx: + case ARM64::ADDSXrx: + case ARM64::ADDSXrx64: + case ARM64::ADDWrx: + case ARM64::ADDXrx: + case ARM64::ADDXrx64: + case ARM64::SUBSWrx: + case ARM64::SUBSXrx: + case ARM64::SUBSXrx64: + case ARM64::SUBWrx: + case ARM64::SUBXrx: + case ARM64::SUBXrx64: + if (MI->getOperand(3).isImm()) { + unsigned val = MI->getOperand(3).getImm(); + return (val != 0); + } + break; + default: + break; } return false; Index: lib/Target/ARM64/ARM64SchedA53.td =================================================================== --- lib/Target/ARM64/ARM64SchedA53.td +++ lib/Target/ARM64/ARM64SchedA53.td @@ -148,7 +148,9 @@ // ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable // operands are needed one cycle later if and only if they are to be -// shifted. Otherwise, they too are needed two cycle later. +// shifted. Otherwise, they too are needed two cycle later. This same +// ReadAdvance applies to Extended registers as well, even though there is +// a seperate SchedPredicate for them. def : ReadAdvance; def A53ReadIEReg : SchedReadVariant<[ - SchedVar, + SchedVar, SchedVar]>; def : SchedAlias; @@ -197,63 +199,82 @@ def : InstRW<[WriteI], (instrs COPY)>; //--- -// Vector Mul with Accumulate -//--- -//def : InstRW<[WriteIM32, A53ReadIMA], (instregex "^M(ADD|SUB)W.*")>; -//def : InstRW<[WriteIM64, A53ReadIMA], (instregex "^M(ADD|SUB)X.*")>; - -//--- // Vector Loads //--- -def : InstRW<[A53WriteVLD1], (instregex "LD1i(8|16|32|64)(_POST)?$")>; -def : InstRW<[A53WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; -def : InstRW<[A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; -def : InstRW<[A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; -def : InstRW<[A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; -def : InstRW<[A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; - -def : InstRW<[A53WriteVLD1], (instregex "LD2i(8|16|32|64)(_POST)?$")>; -def : InstRW<[A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; -def : InstRW<[A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; -def : InstRW<[A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; - -def : InstRW<[A53WriteVLD2], (instregex "LD3i(8|16|32|64)(_POST)?$")>; -def : InstRW<[A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2dq)(_POST)?$")>; -def : InstRW<[A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)(_POST)?$")>; -def : InstRW<[A53WriteVLD3], (instregex "LD3Threev(2d)(_POST)?$")>; - -def : InstRW<[A53WriteVLD2], (instregex "LD4i(8|16|32|64)(_POST)?$")>; -def : InstRW<[A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; -def : InstRW<[A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)(_POST)?$")>; -def : InstRW<[A53WriteVLD4], (instregex "LD4Fourv(2d)(_POST)?$")>; - -def : InstRW<[A53WriteVLD1, A53WriteVLD1], (instregex "LDN?PS.*$")>; -def : InstRW<[A53WriteVLD2, A53WriteVLD2], (instregex "LDN?PD.*$")>; -def : InstRW<[A53WriteVLD4, A53WriteVLD4], (instregex "LDN?PQ.*$")>; +def : InstRW<[A53WriteVLD1], (instregex "LD1i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A53WriteVLD1], (instregex "LD2i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; + +def : InstRW<[A53WriteVLD2], (instregex "LD3i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVLD3], (instregex "LD3Threev(2d)$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; + +def : InstRW<[A53WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; //--- // Vector Stores //--- -def : InstRW<[A53WriteVST1], (instregex "ST1i(8|16|32|64)(_POST)?$")>; -def : InstRW<[A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; -def : InstRW<[A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; -def : InstRW<[A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; -def : InstRW<[A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; - -def : InstRW<[A53WriteVST1], (instregex "ST2i(8|16|32|64)(_POST)?$")>; -def : InstRW<[A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)(_POST)?$")>; -def : InstRW<[A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)(_POST)?$")>; - -def : InstRW<[A53WriteVST2], (instregex "ST3i(8|16|32|64)(_POST)?$")>; -def : InstRW<[A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)(_POST)?$")>; -def : InstRW<[A53WriteVST2], (instregex "ST3Threev(2d)(_POST)?$")>; - -def : InstRW<[A53WriteVST2], (instregex "ST4i(8|16|32|64)(_POST)?$")>; -def : InstRW<[A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)(_POST)?$")>; -def : InstRW<[A53WriteVST2], (instregex "ST4Fourv(2d)(_POST)?$")>; - -def : InstRW<[A53WriteVST1], (instregex "STN?P(S|D).*$")>; -def : InstRW<[A53WriteVST2], (instregex "STN?PQ.*$")>; +def : InstRW<[A53WriteVST1], (instregex "ST1i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A53WriteVST1], (instregex "ST2i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A53WriteVST2], (instregex "ST3i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST3Threev(2d)$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[A53WriteVST2], (instregex "ST4i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; //--- // Floating Point MAC, DIV, SQRT Index: lib/Target/ARM64/ARM64Schedule.td =================================================================== --- lib/Target/ARM64/ARM64Schedule.td +++ lib/Target/ARM64/ARM64Schedule.td @@ -51,7 +51,10 @@ def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST. // Predicate for determining when a shiftable register is shifted. -def RegShiftedPred : SchedPredicate<[{TII->hasNonZeroImm(MI)}]>; +def RegShiftedPred : SchedPredicate<[{TII->hasShiftedReg(MI)}]>; + +// Predicate for determining when a extendedable register is extended. +def RegExtendedPred : SchedPredicate<[{TII->hasExtendedReg(MI)}]>; // ScaledIdxPred is true if a WriteLDIdx operand will be // scaled. Subtargets can use this to dynamically select resources and Index: test/CodeGen/ARM64/misched-basic-A53.ll =================================================================== --- test/CodeGen/ARM64/misched-basic-A53.ll +++ test/CodeGen/ARM64/misched-basic-A53.ll @@ -108,3 +108,18 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } + + +; Regression Test for Bug 19761 +; - [ARM64] Cortex-a53 schedule mode can't handle NEON post-increment load +; - http://llvm.org/bugs/show_bug.cgi?id=19761 +; +; Nothing explicit to check other than llc not crashing. +define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) { + %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8* %A) + %tmp = getelementptr i8* %A, i32 32 + store i8* %tmp, i8** %ptr + ret { <16 x i8>, <16 x i8> } %ld2 +} + +declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8*)