Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -15,6 +15,7 @@ #include "AArch64.h" #include "AArch64RegisterInfo.h" +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -83,6 +84,14 @@ return isUnscaledLdSt(MI.getOpcode()); } + /// Returns the unscaled load/store for the scaled load/store opcode, + /// if there is a corresponding unscaled variant available. + static Optional getUnscaledLdSt(unsigned Opc); + + + /// Returns the index for the immediate for a given instruction. + static unsigned getLoadStoreImmIdx(unsigned Opc); + /// Return true if pairing the given load or store may be paired with another. static bool isPairableLdStInst(const MachineInstr &MI); @@ -111,8 +120,8 @@ /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly. /// /// For unscaled instructions, \p Scale is set to 1. - bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width, - int64_t &MinOffset, int64_t &MaxOffset) const; + static bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width, + int64_t &MinOffset, int64_t &MaxOffset); bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2, unsigned NumLoads) const override; Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1714,6 +1714,64 @@ } } +Optional AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) { + switch (Opc) { + default: return {}; + case AArch64::PRFMui: return AArch64::PRFUMi; + case AArch64::LDRXui: return AArch64::LDURXi; + case AArch64::LDRWui: return AArch64::LDURWi; + case AArch64::LDRBui: return AArch64::LDURBi; + case AArch64::LDRHui: return AArch64::LDURHi; + case AArch64::LDRSui: return AArch64::LDURSi; + case AArch64::LDRDui: return AArch64::LDURDi; + case AArch64::LDRQui: return AArch64::LDURQi; + case AArch64::LDRBBui: return AArch64::LDURBBi; + case AArch64::LDRHHui: return AArch64::LDURHHi; + case AArch64::LDRSBXui: return AArch64::LDURSBXi; + case AArch64::LDRSBWui: return AArch64::LDURSBWi; + case AArch64::LDRSHXui: return AArch64::LDURSHXi; + case AArch64::LDRSHWui: return AArch64::LDURSHWi; + case AArch64::LDRSWui: return AArch64::LDURSWi; + case AArch64::STRXui: return AArch64::STURXi; + case AArch64::STRWui: return AArch64::STURWi; + case AArch64::STRBui: return AArch64::STURBi; + case AArch64::STRHui: return AArch64::STURHi; + case AArch64::STRSui: return AArch64::STURSi; + case AArch64::STRDui: return AArch64::STURDi; + case AArch64::STRQui: return AArch64::STURQi; + case AArch64::STRBBui: return AArch64::STURBBi; + case AArch64::STRHHui: return AArch64::STURHHi; + } +} + +unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { + switch (Opc) { + default: + return 2; + case AArch64::LDPXi: + case AArch64::LDPDi: + case AArch64::STPXi: + case AArch64::STPDi: + case AArch64::LDNPXi: + case AArch64::LDNPDi: + case AArch64::STNPXi: + case AArch64::STNPDi: + case AArch64::LDPQi: + case AArch64::STPQi: + case AArch64::LDNPQi: + case AArch64::STNPQi: + case AArch64::LDPWi: + case AArch64::LDPSi: + case AArch64::STPWi: + case AArch64::STPSi: + case AArch64::LDNPWi: + case AArch64::LDNPSi: + case AArch64::STNPWi: + case AArch64::STNPSi: + return 3; + } +} + bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) { switch (MI.getOpcode()) { default: @@ -1943,7 +2001,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width, int64_t &MinOffset, - int64_t &MaxOffset) const { + int64_t &MaxOffset) { switch (Opcode) { // Not a memory operation or something we want to handle. default: @@ -3148,11 +3206,6 @@ bool *OutUseUnscaledOp, unsigned *OutUnscaledOp, int *EmittableOffset) { - int Scale = 1; - bool IsSigned = false; - // The ImmIdx should be changed case by case if it is not 2. - unsigned ImmIdx = 2; - unsigned UnscaledOp = 0; // Set output values in case of early exit. if (EmittableOffset) *EmittableOffset = 0; @@ -3160,10 +3213,12 @@ *OutUseUnscaledOp = false; if (OutUnscaledOp) *OutUnscaledOp = 0; + + // Exit early for structured vector spills/fills as they can't take an + // immediate offset. switch (MI.getOpcode()) { default: - llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex"); - // Vector spills/fills can't take an immediate offset. + break; case AArch64::LD1Twov2d: case AArch64::LD1Threev2d: case AArch64::LD1Fourv2d: @@ -3177,207 +3232,50 @@ case AArch64::ST1Threev1d: case AArch64::ST1Fourv1d: return AArch64FrameOffsetCannotUpdate; - case AArch64::PRFMui: - Scale = 8; - UnscaledOp = AArch64::PRFUMi; - break; - case AArch64::LDRXui: - Scale = 8; - UnscaledOp = AArch64::LDURXi; - break; - case AArch64::LDRWui: - Scale = 4; - UnscaledOp = AArch64::LDURWi; - break; - case AArch64::LDRBui: - Scale = 1; - UnscaledOp = AArch64::LDURBi; - break; - case AArch64::LDRHui: - Scale = 2; - UnscaledOp = AArch64::LDURHi; - break; - case AArch64::LDRSui: - Scale = 4; - UnscaledOp = AArch64::LDURSi; - break; - case AArch64::LDRDui: - Scale = 8; - UnscaledOp = AArch64::LDURDi; - break; - case AArch64::LDRQui: - Scale = 16; - UnscaledOp = AArch64::LDURQi; - break; - case AArch64::LDRBBui: - Scale = 1; - UnscaledOp = AArch64::LDURBBi; - break; - case AArch64::LDRHHui: - Scale = 2; - UnscaledOp = AArch64::LDURHHi; - break; - case AArch64::LDRSBXui: - Scale = 1; - UnscaledOp = AArch64::LDURSBXi; - break; - case AArch64::LDRSBWui: - Scale = 1; - UnscaledOp = AArch64::LDURSBWi; - break; - case AArch64::LDRSHXui: - Scale = 2; - UnscaledOp = AArch64::LDURSHXi; - break; - case AArch64::LDRSHWui: - Scale = 2; - UnscaledOp = AArch64::LDURSHWi; - break; - case AArch64::LDRSWui: - Scale = 4; - UnscaledOp = AArch64::LDURSWi; - break; - - case AArch64::STRXui: - Scale = 8; - UnscaledOp = AArch64::STURXi; - break; - case AArch64::STRWui: - Scale = 4; - UnscaledOp = AArch64::STURWi; - break; - case AArch64::STRBui: - Scale = 1; - UnscaledOp = AArch64::STURBi; - break; - case AArch64::STRHui: - Scale = 2; - UnscaledOp = AArch64::STURHi; - break; - case AArch64::STRSui: - Scale = 4; - UnscaledOp = AArch64::STURSi; - break; - case AArch64::STRDui: - Scale = 8; - UnscaledOp = AArch64::STURDi; - break; - case AArch64::STRQui: - Scale = 16; - UnscaledOp = AArch64::STURQi; - break; - case AArch64::STRBBui: - Scale = 1; - UnscaledOp = AArch64::STURBBi; - break; - case AArch64::STRHHui: - Scale = 2; - UnscaledOp = AArch64::STURHHi; - break; - - case AArch64::LDPXi: - case AArch64::LDPDi: - case AArch64::STPXi: - case AArch64::STPDi: - case AArch64::LDNPXi: - case AArch64::LDNPDi: - case AArch64::STNPXi: - case AArch64::STNPDi: - ImmIdx = 3; - IsSigned = true; - Scale = 8; - break; - case AArch64::LDPQi: - case AArch64::STPQi: - case AArch64::LDNPQi: - case AArch64::STNPQi: - ImmIdx = 3; - IsSigned = true; - Scale = 16; - break; - case AArch64::LDPWi: - case AArch64::LDPSi: - case AArch64::STPWi: - case AArch64::STPSi: - case AArch64::LDNPWi: - case AArch64::LDNPSi: - case AArch64::STNPWi: - case AArch64::STNPSi: - ImmIdx = 3; - IsSigned = true; - Scale = 4; - break; - - case AArch64::LDURXi: - case AArch64::LDURWi: - case AArch64::LDURBi: - case AArch64::LDURHi: - case AArch64::LDURSi: - case AArch64::LDURDi: - case AArch64::LDURQi: - case AArch64::LDURHHi: - case AArch64::LDURBBi: - case AArch64::LDURSBXi: - case AArch64::LDURSBWi: - case AArch64::LDURSHXi: - case AArch64::LDURSHWi: - case AArch64::LDURSWi: - case AArch64::STURXi: - case AArch64::STURWi: - case AArch64::STURBi: - case AArch64::STURHi: - case AArch64::STURSi: - case AArch64::STURDi: - case AArch64::STURQi: - case AArch64::STURBBi: - case AArch64::STURHHi: - Scale = 1; - break; } - Offset += MI.getOperand(ImmIdx).getImm() * Scale; + // Get the min/max offset and the scale. + unsigned Scale, Width; + int64_t MinOff, MaxOff; + if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), Scale, Width, MinOff, + MaxOff)) + llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); + + // Construct the complete offset. + const MachineOperand &ImmOpnd = + MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); + Offset += ImmOpnd.getImm() * Scale; - bool useUnscaledOp = false; // If the offset doesn't match the scale, we rewrite the instruction to // use the unscaled instruction instead. Likewise, if we have a negative - // offset (and have an unscaled op to use). - if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0)) - useUnscaledOp = true; - - // Use an unscaled addressing mode if the instruction has a negative offset - // (or if the instruction is already using an unscaled addressing mode). - unsigned MaskBits; - if (IsSigned) { - // ldp/stp instructions. - MaskBits = 7; - Offset /= Scale; - } else if (UnscaledOp == 0 || useUnscaledOp) { - MaskBits = 9; - IsSigned = true; - Scale = 1; - } else { - MaskBits = 12; - IsSigned = false; - Offset /= Scale; + // offset and there is an unscaled op to use. + Optional UnscaledOp = + AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode()); + bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0); + if (useUnscaledOp && + !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, Scale, Width, MinOff, MaxOff)) + llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); + + int64_t Remainder = Offset % Scale; + assert(!(Remainder && useUnscaledOp) && + "Cannot have remainder when using unscaled op"); + + assert(MinOff < MaxOff && "Unexpected Min/Max offsets"); + int NewOffset = Offset / Scale; + if (MinOff <= NewOffset && NewOffset <= MaxOff) + Offset = Remainder; + else { + NewOffset = NewOffset < 0 ? MinOff : MaxOff; + Offset = Offset - NewOffset * Scale + Remainder; } - // Attempt to fold address computation. - int MaxOff = (1 << (MaskBits - IsSigned)) - 1; - int MinOff = (IsSigned ? (-MaxOff - 1) : 0); - if (Offset >= MinOff && Offset <= MaxOff) { - if (EmittableOffset) - *EmittableOffset = Offset; - Offset = 0; - } else { - int NewOff = Offset < 0 ? MinOff : MaxOff; - if (EmittableOffset) - *EmittableOffset = NewOff; - Offset = (Offset - NewOff) * Scale; - } + if (EmittableOffset) + *EmittableOffset = NewOffset; if (OutUseUnscaledOp) *OutUseUnscaledOp = useUnscaledOp; - if (OutUnscaledOp) - *OutUnscaledOp = UnscaledOp; + if (OutUnscaledOp && UnscaledOp) + *OutUnscaledOp = *UnscaledOp; + return AArch64FrameOffsetCanUpdate | (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); }