Index: include/llvm/MC/LaneBitmask.h =================================================================== --- include/llvm/MC/LaneBitmask.h +++ include/llvm/MC/LaneBitmask.h @@ -32,61 +32,149 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" +#include namespace llvm { + struct LaneBitmask; + static LLVM_ATTRIBUTE_UNUSED Printable PrintLaneMask(LaneBitmask); + static LLVM_ATTRIBUTE_UNUSED Printable PrintLaneMaskAsInitList(LaneBitmask); + struct LaneBitmask { // When changing the underlying type, change the format string as well. - using Type = unsigned; - enum : unsigned { BitWidth = 8*sizeof(Type) }; + static const unsigned N = 1; + using Type = std::array; + enum : unsigned { BitWidth = 8*N*sizeof(unsigned) }; constexpr static const char *const FormatStr = "%08X"; constexpr LaneBitmask() = default; explicit constexpr LaneBitmask(Type V) : Mask(V) {} - constexpr bool operator== (LaneBitmask M) const { return Mask == M.Mask; } - constexpr bool operator!= (LaneBitmask M) const { return Mask != M.Mask; } - constexpr bool operator< (LaneBitmask M) const { return Mask < M.Mask; } - constexpr bool none() const { return Mask == 0; } - constexpr bool any() const { return Mask != 0; } - constexpr bool all() const { return ~Mask == 0; } + bool operator== (LaneBitmask M) const { return Mask == M.Mask; } + bool operator!= (LaneBitmask M) const { return Mask != M.Mask; } + bool operator< (LaneBitmask M) const { return Mask < M.Mask; } + bool none() const { + for (unsigned I = 0; I != N; ++I) + if (Mask[I] != 0) + return false; + return true; + } + bool any() const { + for (unsigned I = 0; I != N; ++I) + if (Mask[I] != 0) + return true; + return false; + } + bool all() const { + for (unsigned I = 0; I != N; ++I) + if (Mask[I] != ~0u) + return false; + return true; + } - constexpr LaneBitmask operator~() const { - return LaneBitmask(~Mask); + LaneBitmask operator~() const { + Type T; + for (unsigned I = 0; I != N; ++I) + T[I] = ~Mask[I]; + return LaneBitmask(T); } - constexpr LaneBitmask operator|(LaneBitmask M) const { - return LaneBitmask(Mask | M.Mask); + LaneBitmask operator|(LaneBitmask M) const { + Type T; + for (unsigned I = 0; I != N; ++I) + T[I] = Mask[I] | M.Mask[I]; + return LaneBitmask(T); } - constexpr LaneBitmask operator&(LaneBitmask M) const { - return LaneBitmask(Mask & M.Mask); + LaneBitmask operator&(LaneBitmask M) const { + Type T; + for (unsigned I = 0; I != N; ++I) + T[I] = Mask[I] & M.Mask[I]; + return LaneBitmask(T); } LaneBitmask &operator|=(LaneBitmask M) { - Mask |= M.Mask; + for (unsigned I = 0; I != N; ++I) + Mask[I] |= M.Mask[I]; return *this; } LaneBitmask &operator&=(LaneBitmask M) { - Mask &= M.Mask; + for (unsigned I = 0; I != N; ++I) + Mask[I] &= M.Mask[I]; return *this; } - constexpr Type getAsInteger() const { return Mask; } + LaneBitmask rol(unsigned S) const { + if (S == 0) + return *this; + Type T; + + // Rotate words first. + unsigned W = S/32; + for (unsigned I = W; I != N; ++I) + T[I-W] = Mask[I]; + for (unsigned I = 0; I != W; ++I) + T[I+W] = Mask[I]; + + S = S % 32; + if (S != 0) { + // Rotate bits. + unsigned M0 = T[0]; + for (unsigned I = 0; I != N-1; ++I) + T[I] = (T[I] << S) | (T[I+1] >> (32-S)); + T[N-1] = (T[N-1] << S) | (M0 >> (32-S)); + } + + return LaneBitmask(T); + } - static LaneBitmask getNone() { return LaneBitmask(0); } - static LaneBitmask getAll() { return ~LaneBitmask(0); } + unsigned getNumLanes() const { + unsigned S = 0; + for (unsigned M : Mask) + S += countPopulation(M); + return S; + } + unsigned getHighestLane() const { + for (unsigned I = N; I != 0; --I) { + if (Mask[I-1] == 0) + continue; + return Log2_32(Mask[I-1]) + 8*sizeof(unsigned)*(I-1); + } + return -1u; + } + + static LaneBitmask getNone() { return LaneBitmask(); } + static LaneBitmask getAll() { return ~LaneBitmask(); } static LaneBitmask getLane(unsigned Lane) { - return LaneBitmask(Type(1) << Lane); + Type T; + T[Lane / 32] = 1u << (Lane % 32); + return LaneBitmask(T); } private: - Type Mask = 0; + friend Printable PrintLaneMask(LaneBitmask LaneMask); + friend Printable PrintLaneMaskAsInitList(LaneBitmask LaneMask); + Type Mask = {{0}}; }; - /// Create Printable object to print LaneBitmasks on a \ref raw_ostream. + /// Create Printable objects to print LaneBitmasks on a \ref raw_ostream. static LLVM_ATTRIBUTE_UNUSED Printable PrintLaneMask(LaneBitmask LaneMask) { return Printable([LaneMask](raw_ostream &OS) { - OS << format(LaneBitmask::FormatStr, LaneMask.getAsInteger()); + for (unsigned I = LaneBitmask::N; I != 0; --I) + OS << format(LaneBitmask::FormatStr, LaneMask.Mask[I-1]); + }); + } + + static LLVM_ATTRIBUTE_UNUSED Printable + PrintLaneMaskAsInitList(LaneBitmask LaneMask) { + return Printable([LaneMask](raw_ostream &OS) { + OS << "{{"; + for (unsigned I = 0; I != LaneBitmask::N; ++I) { + if (I != 0) + OS << ','; + OS << "0x" << format(LaneBitmask::FormatStr, LaneMask.Mask[I]); + } + OS << "}}"; }); } Index: lib/CodeGen/MIRParser/MIParser.cpp =================================================================== --- lib/CodeGen/MIRParser/MIParser.cpp +++ lib/CodeGen/MIRParser/MIParser.cpp @@ -529,12 +529,10 @@ if (Token.isNot(MIToken::IntegerLiteral) && Token.isNot(MIToken::HexLiteral)) return error("expected a lane mask"); - static_assert(sizeof(LaneBitmask::Type) == sizeof(unsigned), - "Use correct get-function for lane mask"); - LaneBitmask::Type V; + unsigned V; if (getUnsigned(V)) return error("invalid lane mask value"); - Mask = LaneBitmask(V); + Mask = LaneBitmask({{V}}); lex(); } MBB.addLiveIn(Reg, Mask); Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -2239,7 +2239,7 @@ const MachineInstr *DefMI = nullptr; if (VNI->isPHIDef()) { // Conservatively assume that all lanes in a PHI are valid. - LaneBitmask Lanes = SubRangeJoin ? LaneBitmask(1) + LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0) : TRI->getSubRegIndexLaneMask(SubIdx); V.ValidLanes = V.WriteLanes = Lanes; } else { @@ -2247,7 +2247,7 @@ assert(DefMI != nullptr); if (SubRangeJoin) { // We don't care about the lanes when joining subregister ranges. - V.WriteLanes = V.ValidLanes = LaneBitmask(1); + V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0); if (DefMI->isImplicitDef()) { V.ValidLanes = LaneBitmask::getNone(); V.ErasableImplicitDef = true; Index: lib/CodeGen/SplitKit.cpp =================================================================== --- lib/CodeGen/SplitKit.cpp +++ lib/CodeGen/SplitKit.cpp @@ -552,7 +552,7 @@ if ((SubRegMask & ~LaneMask).any()) continue; - unsigned PopCount = countPopulation(SubRegMask.getAsInteger()); + unsigned PopCount = SubRegMask.getNumLanes(); PossibleIndexes.push_back(Idx); if (PopCount > BestCover) { BestCover = PopCount; @@ -583,8 +583,8 @@ // Try to cover as much of the remaining lanes as possible but // as few of the already covered lanes as possible. - int Cover = countPopulation((SubRegMask & LanesLeft).getAsInteger()) - - countPopulation((SubRegMask & ~LanesLeft).getAsInteger()); + int Cover = (SubRegMask & LanesLeft).getNumLanes() + - (SubRegMask & ~LanesLeft).getNumLanes(); if (Cover > BestCover) { BestCover = Cover; BestIdx = Idx; Index: lib/Target/AMDGPU/GCNRegPressure.cpp =================================================================== --- lib/Target/AMDGPU/GCNRegPressure.cpp +++ lib/Target/AMDGPU/GCNRegPressure.cpp @@ -107,7 +107,7 @@ assert(PrevMask < NewMask); Value[Kind == SGPR_TUPLE ? SGPR32 : VGPR32] += - Sign * countPopulation((~PrevMask & NewMask).getAsInteger()); + Sign * (~PrevMask & NewMask).getNumLanes(); if (PrevMask.none()) { assert(NewMask.any()); @@ -201,7 +201,7 @@ return MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg); auto MaxMask = MRI.getMaxLaneMaskForVReg(MO.getReg()); - if (MaxMask.getAsInteger() == 1) // cannot have subregs + if (MaxMask == LaneBitmask::getLane(0)) // cannot have subregs return MaxMask; // For a tentative schedule LIS isn't updated yet but livemask should remain Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1275,8 +1275,7 @@ return RC; // We can assume that each lane corresponds to one 32-bit register. - LaneBitmask::Type Mask = getSubRegIndexLaneMask(SubIdx).getAsInteger(); - unsigned Count = countPopulation(Mask); + unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes(); if (isSGPRClass(RC)) { switch (Count) { case 1: Index: utils/TableGen/CodeGenRegisters.cpp =================================================================== --- utils/TableGen/CodeGenRegisters.cpp +++ utils/TableGen/CodeGenRegisters.cpp @@ -1295,9 +1295,7 @@ // Moving from a class with no subregisters we just had a single lane: // The subregister must be a leaf subregister and only occupies 1 bit. // Move the bit from the class without subregisters into that position. - static_assert(sizeof(Idx.LaneMask.getAsInteger()) == 4, - "Change Log2_32 to a proper one"); - unsigned DstBit = Log2_32(Idx.LaneMask.getAsInteger()); + unsigned DstBit = Idx.LaneMask.getHighestLane(); assert(Idx.LaneMask == LaneBitmask::getLane(DstBit) && "Must be a leaf subregister"); MaskRolPair MaskRol = { LaneBitmask::getLane(0), (uint8_t)DstBit }; @@ -1328,9 +1326,7 @@ assert(Composite->getComposites().empty()); // Create Mask+Rotate operation and merge with existing ops if possible. - static_assert(sizeof(Composite->LaneMask.getAsInteger()) == 4, - "Change Log2_32 to a proper one"); - unsigned DstBit = Log2_32(Composite->LaneMask.getAsInteger()); + unsigned DstBit = Composite->LaneMask.getHighestLane(); int Shift = DstBit - SrcBit; uint8_t RotateLeft = Shift >= 0 ? (uint8_t)Shift : LaneBitmask::BitWidth + Shift; Index: utils/TableGen/RegisterInfoEmitter.cpp =================================================================== --- utils/TableGen/RegisterInfoEmitter.cpp +++ utils/TableGen/RegisterInfoEmitter.cpp @@ -611,7 +611,7 @@ } static void printMask(raw_ostream &OS, LaneBitmask Val) { - OS << "LaneBitmask(0x" << PrintLaneMask(Val) << ')'; + OS << "LaneBitmask{" << PrintLaneMaskAsInitList(Val) << '}'; } // Try to combine Idx's compose map into Vec if it is compatible. @@ -775,13 +775,8 @@ " --IdxA; assert(IdxA < " << SubRegIndices.size() << " && \"Subregister index out of bounds\");\n" " LaneBitmask Result;\n" - " for (const MaskRolOp *Ops = CompositeSequences[IdxA]; Ops->Mask.any(); ++Ops) {\n" - " LaneBitmask::Type M = LaneMask.getAsInteger() & Ops->Mask.getAsInteger();\n" - " if (unsigned S = Ops->RotateLeft)\n" - " Result |= LaneBitmask((M << S) | (M >> (LaneBitmask::BitWidth - S)));\n" - " else\n" - " Result |= LaneBitmask(M);\n" - " }\n" + " for (const MaskRolOp *Ops = CompositeSequences[IdxA]; Ops->Mask.any(); ++Ops)\n" + " Result |= (LaneMask & Ops->Mask).rol(Ops->RotateLeft);\n" " return Result;\n" "}\n\n"; @@ -792,13 +787,8 @@ " --IdxA; assert(IdxA < " << SubRegIndices.size() << " && \"Subregister index out of bounds\");\n" " LaneBitmask Result;\n" - " for (const MaskRolOp *Ops = CompositeSequences[IdxA]; Ops->Mask.any(); ++Ops) {\n" - " LaneBitmask::Type M = LaneMask.getAsInteger();\n" - " if (unsigned S = Ops->RotateLeft)\n" - " Result |= LaneBitmask((M >> S) | (M << (LaneBitmask::BitWidth - S)));\n" - " else\n" - " Result |= LaneBitmask(M);\n" - " }\n" + " for (const MaskRolOp *Ops = CompositeSequences[IdxA]; Ops->Mask.any(); ++Ops)\n" + " Result |= LaneMask.rol(LaneBitmask::BitWidth - Ops->RotateLeft);\n" " return Result;\n" "}\n\n"; }