Index: llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp =================================================================== --- llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -1294,9 +1294,28 @@ SMLoc IDLoc) { MCContext &Context = getParser().getContext(); const MCRegisterInfo *RI = getContext().getRegisterInfo(); - std::string r = "r"; - std::string v = "v"; - std::string Colon = ":"; + const std::string r = "r"; + const std::string v = "v"; + const std::string Colon = ":"; + using RegPairVals = std::pair; + auto GetRegPair = [this, r](RegPairVals RegPair) { + const std::string R1 = r + utostr(RegPair.first); + const std::string R2 = r + utostr(RegPair.second); + + return std::make_pair(matchRegister(R1), matchRegister(R2)); + }; + auto GetScalarRegs = [RI, GetRegPair](unsigned RegPair) { + const unsigned Lower = RI->getEncodingValue(RegPair); + const RegPairVals RegPair_ = std::make_pair(Lower + 1, Lower); + + return GetRegPair(RegPair_); + }; + auto GetVecRegs = [GetRegPair](unsigned VecRegPair) { + const RegPairVals RegPair = + HexagonMCInstrInfo::GetVecRegPairIndices(VecRegPair); + + return GetRegPair(RegPair); + }; bool is32bit = false; // used to distinguish between CONST32 and CONST64 switch (Inst.getOpcode()) { @@ -1388,14 +1407,9 @@ // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)" case Hexagon::A2_tfrp: { MCOperand &MO = Inst.getOperand(1); - unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); - std::string R1 = r + utostr(RegPairNum + 1); - StringRef Reg1(R1); - MO.setReg(matchRegister(Reg1)); - // Add a new operand for the second register in the pair. - std::string R2 = r + utostr(RegPairNum); - StringRef Reg2(R2); - Inst.addOperand(MCOperand::createReg(matchRegister(Reg2))); + const std::pair RegPair = GetScalarRegs(MO.getReg()); + MO.setReg(RegPair.first); + Inst.addOperand(MCOperand::createReg(RegPair.second)); Inst.setOpcode(Hexagon::A2_combinew); break; } @@ -1403,14 +1417,9 @@ case Hexagon::A2_tfrpt: case Hexagon::A2_tfrpf: { MCOperand &MO = Inst.getOperand(2); - unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); - std::string R1 = r + utostr(RegPairNum + 1); - StringRef Reg1(R1); - MO.setReg(matchRegister(Reg1)); - // Add a new operand for the second register in the pair. - std::string R2 = r + utostr(RegPairNum); - StringRef Reg2(R2); - Inst.addOperand(MCOperand::createReg(matchRegister(Reg2))); + const std::pair RegPair = GetScalarRegs(MO.getReg()); + MO.setReg(RegPair.first); + Inst.addOperand(MCOperand::createReg(RegPair.second)); Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt) ? Hexagon::C2_ccombinewt : Hexagon::C2_ccombinewf); @@ -1419,14 +1428,9 @@ case Hexagon::A2_tfrptnew: case Hexagon::A2_tfrpfnew: { MCOperand &MO = Inst.getOperand(2); - unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); - std::string R1 = r + utostr(RegPairNum + 1); - StringRef Reg1(R1); - MO.setReg(matchRegister(Reg1)); - // Add a new operand for the second register in the pair. - std::string R2 = r + utostr(RegPairNum); - StringRef Reg2(R2); - Inst.addOperand(MCOperand::createReg(matchRegister(Reg2))); + const std::pair RegPair = GetScalarRegs(MO.getReg()); + MO.setReg(RegPair.first); + Inst.addOperand(MCOperand::createReg(RegPair.second)); Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew) ? Hexagon::C2_ccombinewnewt : Hexagon::C2_ccombinewnewf); @@ -1436,12 +1440,9 @@ // Translate a "$Vdd = $Vss" to "$Vdd = vcombine($Vs, $Vt)" case Hexagon::V6_vassignp: { MCOperand &MO = Inst.getOperand(1); - unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); - std::string R1 = v + utostr(RegPairNum + 1); - MO.setReg(MatchRegisterName(R1)); - // Add a new operand for the second register in the pair. - std::string R2 = v + utostr(RegPairNum); - Inst.addOperand(MCOperand::createReg(MatchRegisterName(R2))); + const std::pair RegPair = GetVecRegs(MO.getReg()); + MO.setReg(RegPair.first); + Inst.addOperand(MCOperand::createReg(RegPair.second)); Inst.setOpcode(Hexagon::V6_vcombine); break; } Index: llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp =================================================================== --- llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -498,9 +498,13 @@ } else if (HexagonMCInstrInfo::hasNewValue(*MCII, Inst)) { unsigned Producer = HexagonMCInstrInfo::getNewValueOperand(*MCII, Inst).getReg(); - if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15) - Producer = ((Producer - Hexagon::W0) << 1) + SubregBit + Hexagon::V0; - else if (SubregBit) + + if (HexagonMCInstrInfo::IsVecRegPair(Producer)) { + const bool Rev = HexagonMCInstrInfo::IsReverseVecRegPair(Producer); + const unsigned ProdPairIndex = + Rev ? Producer - Hexagon::WR0 : Producer - Hexagon::W0; + Producer = (ProdPairIndex << 1) + SubregBit + Hexagon::V0; + } else if (SubregBit) // Hexagon PRM 10.11 New-value operands // Nt[0] is reserved and should always be encoded as zero. return MCDisassembler::Fail; @@ -606,12 +610,16 @@ uint64_t /*Address*/, const void *Decoder) { static const MCPhysReg HvxWRDecoderTable[] = { - Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3, - Hexagon::W4, Hexagon::W5, Hexagon::W6, Hexagon::W7, - Hexagon::W8, Hexagon::W9, Hexagon::W10, Hexagon::W11, - Hexagon::W12, Hexagon::W13, Hexagon::W14, Hexagon::W15}; + Hexagon::W0, Hexagon::WR0, Hexagon::W1, Hexagon::WR1, Hexagon::W2, + Hexagon::WR2, Hexagon::W3, Hexagon::WR3, Hexagon::W4, Hexagon::WR4, + Hexagon::W5, Hexagon::WR5, Hexagon::W6, Hexagon::WR6, Hexagon::W7, + Hexagon::WR7, Hexagon::W8, Hexagon::WR8, Hexagon::W9, Hexagon::WR9, + Hexagon::W10, Hexagon::WR10, Hexagon::W11, Hexagon::WR11, Hexagon::W12, + Hexagon::WR12, Hexagon::W13, Hexagon::WR13, Hexagon::W14, Hexagon::WR14, + Hexagon::W15, Hexagon::WR15, + }; - return (DecodeRegisterClass(Inst, RegNo >> 1, HvxWRDecoderTable)); + return DecodeRegisterClass(Inst, RegNo, HvxWRDecoderTable); } LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily. Index: llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -172,6 +172,13 @@ Reserved.set(Hexagon::C8); Reserved.set(Hexagon::USR_OVF); + // Leveraging these registers will require more work to recognize + // the new semantics posed, Hi/LoVec patterns, etc. + // Note well: if enabled, they should be restricted to only + // where `HST.useHVXOps() && HST.hasV67Ops()` is true. + for (auto Reg : Hexagon_MC::GetVectRegRev()) + Reserved.set(Reg); + if (MF.getSubtarget().hasReservedR19()) Reserved.set(Hexagon::R19); Index: llvm/lib/Target/Hexagon/HexagonRegisterInfo.td =================================================================== --- llvm/lib/Target/Hexagon/HexagonRegisterInfo.td +++ llvm/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -18,6 +18,12 @@ let HWEncoding{4-0} = num; } + // These registers are used to preserve a distinction between + // vector register pairs of differing order. + class HexagonFakeReg : Register { + let isArtificial = 1; + } + class HexagonDoubleReg num, string n, list subregs, list alt = []> : RegisterWithSubRegs { @@ -30,6 +36,13 @@ class Ri num, string n, list alt = []> : HexagonReg; + // Rp - false/pseudo registers. These registers are used + // to provide a distinct set of aliases for both styles of vector + // register pairs without encountering subregister indexing constraints. + class R_fake : + HexagonFakeReg; + + // Rf - 32-bit floating-point registers. class Rf num, string n> : HexagonReg; @@ -81,6 +94,7 @@ def isub_hi : SubRegIndex<32, 32>; def vsub_lo : SubRegIndex<512>; def vsub_hi : SubRegIndex<512, 512>; + def vsub_fake: SubRegIndex<512>; def wsub_lo : SubRegIndex<1024>; def wsub_hi : SubRegIndex<1024, 1024>; def subreg_overflow : SubRegIndex<1, 0>; @@ -183,27 +197,49 @@ foreach i = 0-31 in { def V#i : Ri, DwarfRegNum<[!add(i, 99)]>; + def VF#i : R_fake<"__"#!add(i,999999)>, DwarfRegNum<[!add(i, 999999)]>; + def VFR#i : R_fake<"__"#!add(i,9999999)>, DwarfRegNum<[!add(i, 9999999)]>; } def VTMP : Ri<0, "vtmp">, DwarfRegNum<[131]>; // Aliases of the V* registers used to hold double vec values. - let SubRegIndices = [vsub_lo, vsub_hi], CoveredBySubRegs = 1 in { - def W0 : Rd< 0, "v1:0", [V0, V1]>, DwarfRegNum<[99]>; - def W1 : Rd< 2, "v3:2", [V2, V3]>, DwarfRegNum<[101]>; - def W2 : Rd< 4, "v5:4", [V4, V5]>, DwarfRegNum<[103]>; - def W3 : Rd< 6, "v7:6", [V6, V7]>, DwarfRegNum<[105]>; - def W4 : Rd< 8, "v9:8", [V8, V9]>, DwarfRegNum<[107]>; - def W5 : Rd<10, "v11:10", [V10, V11]>, DwarfRegNum<[109]>; - def W6 : Rd<12, "v13:12", [V12, V13]>, DwarfRegNum<[111]>; - def W7 : Rd<14, "v15:14", [V14, V15]>, DwarfRegNum<[113]>; - def W8 : Rd<16, "v17:16", [V16, V17]>, DwarfRegNum<[115]>; - def W9 : Rd<18, "v19:18", [V18, V19]>, DwarfRegNum<[117]>; - def W10 : Rd<20, "v21:20", [V20, V21]>, DwarfRegNum<[119]>; - def W11 : Rd<22, "v23:22", [V22, V23]>, DwarfRegNum<[121]>; - def W12 : Rd<24, "v25:24", [V24, V25]>, DwarfRegNum<[123]>; - def W13 : Rd<26, "v27:26", [V26, V27]>, DwarfRegNum<[125]>; - def W14 : Rd<28, "v29:28", [V28, V29]>, DwarfRegNum<[127]>; - def W15 : Rd<30, "v31:30", [V30, V31]>, DwarfRegNum<[129]>; + let SubRegIndices = [vsub_lo, vsub_hi, vsub_fake], CoveredBySubRegs = 1 in { + def W0 : Rd< 0, "v1:0", [V0, V1, VF0]>, DwarfRegNum<[99]>; + def W1 : Rd< 2, "v3:2", [V2, V3, VF1]>, DwarfRegNum<[101]>; + def W2 : Rd< 4, "v5:4", [V4, V5, VF2]>, DwarfRegNum<[103]>; + def W3 : Rd< 6, "v7:6", [V6, V7, VF3]>, DwarfRegNum<[105]>; + def W4 : Rd< 8, "v9:8", [V8, V9, VF4]>, DwarfRegNum<[107]>; + def W5 : Rd<10, "v11:10", [V10, V11, VF5]>, DwarfRegNum<[109]>; + def W6 : Rd<12, "v13:12", [V12, V13, VF6]>, DwarfRegNum<[111]>; + def W7 : Rd<14, "v15:14", [V14, V15, VF7]>, DwarfRegNum<[113]>; + def W8 : Rd<16, "v17:16", [V16, V17, VF8]>, DwarfRegNum<[115]>; + def W9 : Rd<18, "v19:18", [V18, V19, VF9]>, DwarfRegNum<[117]>; + def W10 : Rd<20, "v21:20", [V20, V21, VF10]>, DwarfRegNum<[119]>; + def W11 : Rd<22, "v23:22", [V22, V23, VF11]>, DwarfRegNum<[121]>; + def W12 : Rd<24, "v25:24", [V24, V25, VF12]>, DwarfRegNum<[123]>; + def W13 : Rd<26, "v27:26", [V26, V27, VF13]>, DwarfRegNum<[125]>; + def W14 : Rd<28, "v29:28", [V28, V29, VF14]>, DwarfRegNum<[127]>; + def W15 : Rd<30, "v31:30", [V30, V31, VF15]>, DwarfRegNum<[129]>; + } + + // Reverse Aliases of the V* registers used to hold double vec values. + let SubRegIndices = [vsub_lo, vsub_hi, vsub_fake], CoveredBySubRegs = 1 in { + def WR0 : Rd< 1, "v0:1", [V0, V1, VFR0]>, DwarfRegNum<[161]>; + def WR1 : Rd< 3, "v2:3", [V2, V3, VFR1]>, DwarfRegNum<[162]>; + def WR2 : Rd< 5, "v4:5", [V4, V5, VFR2]>, DwarfRegNum<[163]>; + def WR3 : Rd< 7, "v6:7", [V6, V7, VFR3]>, DwarfRegNum<[164]>; + def WR4 : Rd< 9, "v8:9", [V8, V9, VFR4]>, DwarfRegNum<[165]>; + def WR5 : Rd<11, "v10:11", [V10, V11, VFR5]>, DwarfRegNum<[166]>; + def WR6 : Rd<13, "v12:13", [V12, V13, VFR6]>, DwarfRegNum<[167]>; + def WR7 : Rd<15, "v14:15", [V14, V15, VFR7]>, DwarfRegNum<[168]>; + def WR8 : Rd<17, "v16:17", [V16, V17, VFR8]>, DwarfRegNum<[169]>; + def WR9 : Rd<19, "v18:19", [V18, V19, VFR9]>, DwarfRegNum<[170]>; + def WR10: Rd<21, "v20:21", [V20, V21, VFR10]>, DwarfRegNum<[171]>; + def WR11: Rd<23, "v22:23", [V22, V23, VFR11]>, DwarfRegNum<[172]>; + def WR12: Rd<25, "v24:25", [V24, V25, VFR12]>, DwarfRegNum<[173]>; + def WR13: Rd<27, "v26:27", [V26, V27, VFR13]>, DwarfRegNum<[174]>; + def WR14: Rd<29, "v28:29", [V28, V29, VFR14]>, DwarfRegNum<[175]>; + def WR15: Rd<31, "v30:31", [V30, V31, VFR15]>, DwarfRegNum<[176]>; } // Aliases of the V* registers used to hold quad vec values. @@ -314,7 +350,7 @@ } def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32], 1024, - (add (sequence "W%u", 0, 15))> { + (add (sequence "W%u", 0, 15), (sequence "WR%u", 0, 15))> { let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], [RegInfo<1024,1024,1024>, RegInfo<2048,2048,2048>, RegInfo<1024,1024,1024>]>; } @@ -365,6 +401,10 @@ FRAMELIMIT, FRAMEKEY, PKTCOUNTLO, PKTCOUNTHI, UTIMERLO, UTIMERHI, M0, M1, USR)>; +let Size = 64 in +def VectRegRev : RegisterClass<"Hexagon", [i64], 64, + (add (sequence "WR%u", 0, 15))>; + let isAllocatable = 0 in def UsrBits : RegisterClass<"Hexagon", [i1], 0, (add USR_OVF)>; Index: llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp +++ llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp @@ -71,9 +71,10 @@ char HexagonVectorPrint::ID = 0; static bool isVecReg(unsigned Reg) { - return (Reg >= Hexagon::V0 && Reg <= Hexagon::V31) - || (Reg >= Hexagon::W0 && Reg <= Hexagon::W15) - || (Reg >= Hexagon::Q0 && Reg <= Hexagon::Q3); + return (Reg >= Hexagon::V0 && Reg <= Hexagon::V31) || + (Reg >= Hexagon::W0 && Reg <= Hexagon::W15) || + (Reg >= Hexagon::WR0 && Reg <= Hexagon::WR15) || + (Reg >= Hexagon::Q0 && Reg <= Hexagon::Q3); } static std::string getStringReg(unsigned R) { Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h =================================================================== --- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h +++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h @@ -72,6 +72,10 @@ using ReadOnlyIterator = std::set::iterator; std::set ReadOnly; + // Contains the vector-pair-registers with the even number + // first ("v0:1", e.g.) used/def'd in this packet. + std::set ReversePairs; + void init(); void init(MCInst const &); void initReg(MCInst const &, unsigned, unsigned &PredReg, bool &isTrue); @@ -94,6 +98,7 @@ bool checkAXOK(); bool checkHWLoop(); bool checkCOFMax1(); + bool checkLegalVecRegPair(); static void compoundRegisterMap(unsigned &); Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp =================================================================== --- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -81,6 +81,9 @@ if (!MCSubRegIterator(*SRI, &RI).isValid()) // Skip super-registers used indirectly. Uses.insert(*SRI); + + if (HexagonMCInstrInfo::IsReverseVecRegPair(R)) + ReversePairs.insert(R); } void HexagonMCChecker::init(MCInst const &MCI) { @@ -133,6 +136,9 @@ if (R == Hexagon::C8) R = Hexagon::USR; + if (HexagonMCInstrInfo::IsReverseVecRegPair(R)) + ReversePairs.insert(R); + // Note register definitions, direct ones as well as indirect side-effects. // Super-registers are not tracked directly, but their components. for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); @@ -192,7 +198,7 @@ MCSubtargetInfo const &STI, MCInst &mcb, MCRegisterInfo const &ri, bool ReportErrors) : Context(Context), MCB(mcb), RI(ri), MCII(MCII), STI(STI), - ReportErrors(ReportErrors) { + ReportErrors(ReportErrors), ReversePairs() { init(); } @@ -200,7 +206,10 @@ MCSubtargetInfo const &STI, bool CopyReportErrors) : Context(Other.Context), MCB(Other.MCB), RI(Other.RI), MCII(Other.MCII), - STI(STI), ReportErrors(CopyReportErrors ? Other.ReportErrors : false) {} + STI(STI), ReportErrors(CopyReportErrors ? Other.ReportErrors : false), + ReversePairs() { + init(); +} bool HexagonMCChecker::check(bool FullCheck) { bool chkP = checkPredicates(); @@ -218,8 +227,9 @@ bool chkAXOK = checkAXOK(); bool chkCofMax1 = checkCOFMax1(); bool chkHWLoop = checkHWLoop(); + bool chkLegalVecRegPair = checkLegalVecRegPair(); bool chk = chkP && chkNV && chkR && chkRRO && chkS && chkSh && chkSl && - chkAXOK && chkCofMax1 && chkHWLoop; + chkAXOK && chkCofMax1 && chkHWLoop && chkLegalVecRegPair; return chk; } @@ -729,3 +739,16 @@ if (ReportErrors) Context.reportWarning(MCB.getLoc(), Msg); } + +bool HexagonMCChecker::checkLegalVecRegPair() { + const bool IsPermitted = STI.getFeatureBits()[Hexagon::ArchV67]; + const bool HasReversePairs = ReversePairs.size() != 0; + + if (!IsPermitted && HasReversePairs) { + for (auto R : ReversePairs) + reportError("register pair `" + Twine(RI.getName(R)) + + "' is not permitted for this architecture"); + return false; + } + return true; +} Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp =================================================================== --- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -391,15 +391,9 @@ static bool RegisterMatches(unsigned Consumer, unsigned Producer, unsigned Producer2) { - if (Consumer == Producer) - return true; - if (Consumer == Producer2) - return true; - // Calculate if we're a single vector consumer referencing a double producer - if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15) - if (Consumer >= Hexagon::V0 && Consumer <= Hexagon::V31) - return ((Consumer - Hexagon::V0) >> 1) == (Producer - Hexagon::W0); - return false; + return (Consumer == Producer) || (Consumer == Producer2) || + HexagonMCInstrInfo::IsSingleConsumerRefPairProducer(Producer, + Consumer); } /// EncodeSingleInstruction - Emit a single @@ -735,7 +729,8 @@ unsigned SOffset = 0; unsigned VOffset = 0; unsigned UseReg = MO.getReg(); - unsigned DefReg1, DefReg2; + unsigned DefReg1 = Hexagon::NoRegister; + unsigned DefReg2 = Hexagon::NoRegister; auto Instrs = HexagonMCInstrInfo::bundleInstructions(*State.Bundle); const MCOperand *I = Instrs.begin() + State.Index - 1; @@ -746,7 +741,8 @@ if (HexagonMCInstrInfo::isImmext(Inst)) continue; - DefReg1 = DefReg2 = 0; + DefReg1 = Hexagon::NoRegister; + DefReg2 = Hexagon::NoRegister; ++SOffset; if (HexagonMCInstrInfo::isVector(MCII, Inst)) { // Vector instructions don't count scalars. Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h =================================================================== --- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -351,6 +351,16 @@ unsigned SubregisterBit(unsigned Consumer, unsigned Producer, unsigned Producer2); +bool IsVecRegSingle(unsigned VecReg); +bool IsVecRegPair(unsigned VecReg); +bool IsReverseVecRegPair(unsigned VecReg); +bool IsSingleConsumerRefPairProducer(unsigned Producer, unsigned Consumer); + +/// Returns an ordered pair of the constituent register ordinals for +/// each of the elements of \a VecRegPair. For example, Hexagon::W0 ("v0:1") +/// returns { 0, 1 } and Hexagon::W1 ("v3:2") returns { 3, 2 }. +std::pair GetVecRegPairIndices(unsigned VecRegPair); + // Attempt to find and replace compound pairs void tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCContext &Context, MCInst &MCI); Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp =================================================================== --- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -676,6 +676,45 @@ return (Flags & outerLoopMask) != 0; } +bool HexagonMCInstrInfo::IsVecRegPair(unsigned VecReg) { + return (VecReg >= Hexagon::W0 && VecReg <= Hexagon::W15) || + (VecReg >= Hexagon::WR0 && VecReg <= Hexagon::WR15); +} + +bool HexagonMCInstrInfo::IsReverseVecRegPair(unsigned VecReg) { + return (VecReg >= Hexagon::WR0 && VecReg <= Hexagon::WR15); +} + +bool HexagonMCInstrInfo::IsVecRegSingle(unsigned VecReg) { + return (VecReg >= Hexagon::V0 && VecReg <= Hexagon::V31); +} + +std::pair +HexagonMCInstrInfo::GetVecRegPairIndices(unsigned VecRegPair) { + assert(IsVecRegPair(VecRegPair) && + "VecRegPair must be a vector register pair"); + + const bool IsRev = IsReverseVecRegPair(VecRegPair); + const unsigned PairIndex = + 2 * (IsRev ? VecRegPair - Hexagon::WR0 : VecRegPair - Hexagon::W0); + + return IsRev ? std::make_pair(PairIndex, PairIndex + 1) + : std::make_pair(PairIndex + 1, PairIndex); +} + +bool HexagonMCInstrInfo::IsSingleConsumerRefPairProducer(unsigned Producer, + unsigned Consumer) { + if (IsVecRegPair(Producer) && IsVecRegSingle(Consumer)) { + const unsigned ProdPairIndex = IsReverseVecRegPair(Producer) + ? Producer - Hexagon::WR0 + : Producer - Hexagon::W0; + const unsigned ConsumerSingleIndex = (Consumer - Hexagon::V0) >> 1; + + return ConsumerSingleIndex == ProdPairIndex; + } + return false; +} + bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; @@ -971,9 +1010,8 @@ unsigned Producer2) { // If we're a single vector consumer of a double producer, set subreg bit // based on if we're accessing the lower or upper register component - if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15) - if (Consumer >= Hexagon::V0 && Consumer <= Hexagon::V31) - return (Consumer - Hexagon::V0) & 0x1; + if (IsVecRegPair(Producer) && IsVecRegSingle(Consumer)) + return (Consumer - Hexagon::V0) & 0x1; if (Producer2 != Hexagon::NoRegister) return Consumer == Producer; return 0; Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h =================================================================== --- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H #define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include #include @@ -82,6 +83,8 @@ void addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS); unsigned GetELFFlags(const MCSubtargetInfo &STI); + + llvm::ArrayRef GetVectRegRev(); } MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII, Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp =================================================================== --- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -532,6 +532,8 @@ return F->second; } +llvm::ArrayRef Hexagon_MC::GetVectRegRev() { return VectRegRev; } + namespace { class HexagonMCInstrAnalysis : public MCInstrAnalysis { public: Index: llvm/test/CodeGen/Hexagon/swp-sigma.ll =================================================================== --- llvm/test/CodeGen/Hexagon/swp-sigma.ll +++ llvm/test/CodeGen/Hexagon/swp-sigma.ll @@ -2,28 +2,11 @@ ; We do not pipeline sigma yet, but the non-pipelined version ; with good scheduling is pretty fast. The compiler generates -; 19 packets, and the assembly version is 16. +; 18 packets, and the assembly version is 16. ; CHECK: loop0(.LBB0_[[LOOP:.]], ; CHECK: .LBB0_[[LOOP]]: -; CHECK: } -; CHECK: } -; CHECK: } -; CHECK: } -; CHECK: } -; CHECK: } -; CHECK: } -; CHECK: } -; CHECK: } -; CHECK: } -; CHECK: } -; CHECK: } -; CHECK: } -; CHECK: } -; CHECK: } -; CHECK: } -; CHECK: } -; CHECK: } +; CHECK-COUNT-17: } ; CHECK: }{{[ \t]*}}:endloop @g0 = external constant [10 x i16], align 128 Index: llvm/test/CodeGen/Hexagon/vect-regpairs.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Hexagon/vect-regpairs.ll @@ -0,0 +1,135 @@ +;RUN: llc -march=hexagon -mcpu=hexagonv66 -mhvx -filetype=obj < %s -o - | llvm-objdump -mv66 -mhvx -d - | FileCheck --check-prefix=CHECK-V66 %s +;RUN: llc -march=hexagon -mcpu=hexagonv67 -mhvx -filetype=obj < %s -o - | llvm-objdump -mv67 -mhvx -d - | FileCheck --check-prefix=CHECK-V67 %s + +; Should not attempt to use v: 'reverse' vector regpairs +; on old or new arches (should not crash). + +; CHECK-V66: vcombine +; CHECK-V67: vcombine +declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) +declare <16 x i32> @llvm.hexagon.V6.vd0() +declare <32 x i32> @llvm.hexagon.V6.vmpybus(<16 x i32>, i32) +declare <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32>, <32 x i32>, i32) +declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) +declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32 ) +declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) +declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32 ) +declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) +declare <16 x i32> @llvm.hexagon.V6.vmpyihb.acc(<16 x i32>, <16 x i32>, i32) +declare <16 x i32> @llvm.hexagon.V6.vasrhubrndsat(<16 x i32>, <16 x i32>, i32) + +declare <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32>, <16 x i32>) +declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32) +declare <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32>, <16 x i32>, i32) +declare <16 x i32> @llvm.hexagon.V6.vshuffob(<16 x i32>, <16 x i32>) + + +define void @Gaussian7x7u8PerRow(i8* %src, i32 %stride, i32 %width, i8* %dst) #0 { +entry: + %mul = mul i32 %stride, 3 + %idx.neg = sub i32 0, %mul + %add.ptr = getelementptr i8, i8* %src, i32 %idx.neg + bitcast i8* %add.ptr to <16 x i32>* + %mul1 = shl i32 %stride, 1 + %idx.neg2 = sub i32 0, %mul1 + %add.ptr3 = getelementptr i8, i8* %src, i32 %idx.neg2 + bitcast i8* %add.ptr3 to <16 x i32>* + %idx.neg5 = sub i32 0, %stride + %add.ptr6 = getelementptr i8, i8* %src, i32 %idx.neg5 + bitcast i8* %add.ptr6 to <16 x i32>* + bitcast i8* %src to <16 x i32>* + %add.ptr10 = getelementptr i8, i8* %src, i32 %stride + bitcast i8* %add.ptr10 to <16 x i32>* + %add.ptr12 = getelementptr i8, i8* %src, i32 %mul1 + bitcast i8* %add.ptr12 to <16 x i32>* + %add.ptr14 = getelementptr i8, i8* %src, i32 %mul + bitcast i8* %add.ptr14 to <16 x i32>* + bitcast i8* %dst to <16 x i32>* + load <16 x i32>, <16 x i32>* %0load <16 x i32>, <16 x i32>* %1load <16 x i32>, <16 x i32>* %2load <16 x i32>, <16 x i32>* %3load <16 x i32>, <16 x i32>* %4load <16 x i32>, <16 x i32>* %5load <16 x i32>, <16 x i32>* %6call <16 x i32> @llvm.hexagon.V6.vd0() + call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %15, <16 x i32> %15) + call <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32> %14, <16 x i32> %8) + call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %13, <16 x i32> %9) + call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %17, <32 x i32> %18, i32 101058054) + call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %12, <16 x i32> %10) + call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %19, <32 x i32> %20, i32 252645135) + call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %21, <16 x i32> %11, i32 336860180) + %cmp155 = icmp sgt i32 %width, 64 + br i1 %cmp155, label %for.body.preheader, label %for.end +for.body.preheader: %incdec.ptr20 = getelementptr i8, i8* %add.ptr14%23 = bitcast i8* %incdec.ptr20 to <16 x i32>* + %incdec.ptr19 = getelementptr i8, i8* %add.ptr12%24 = bitcast i8* %incdec.ptr19 to <16 x i32>* + %incdec.ptr18 = getelementptr i8, i8* %add.ptr10%25 = bitcast i8* %incdec.ptr18 to <16 x i32>* + %incdec.ptr17 = getelementptr i8, i8* %src%26 = bitcast i8* %incdec.ptr17 to <16 x i32>* + %incdec.ptr16 = getelementptr i8, i8* %add.ptr6%27 = bitcast i8* %incdec.ptr16 to <16 x i32>* + %incdec.ptr15 = getelementptr i8, i8* %add.ptr3%28 = bitcast i8* %incdec.ptr15 to <16 x i32>* + %incdec.ptr = getelementptr i8, i8* %add.ptr%29 = bitcast i8* %incdec.ptr to <16 x i32>* + br label %for.body +for.body: %optr.0166 = phi <16 x i32>* [ %incdec.ptr28, %for.body ], [ %7, %for.body.preheader ] + %iptr6.0165 = phi <16 x i32>* [ %incdec.ptr27, %for.body ], [ %23, %for.body.preheader ] + %iptr5.0164 = phi <16 x i32>* [ %incdec.ptr26, %for.body ], [ %24, %for.body.preheader ] + %iptr4.0163 = phi <16 x i32>* [ %incdec.ptr25, %for.body ], [ %25, %for.body.preheader ] + %iptr3.0162 = phi <16 x i32>* [ %incdec.ptr24, %for.body ], [ %26, %for.body.preheader ] + %iptr2.0161 = phi <16 x i32>* [ %incdec.ptr23, %for.body ], [ %27, %for.body.preheader ] + %iptr1.0160 = phi <16 x i32>* [ %incdec.ptr22, %for.body ], [ %28, %for.body.preheader ] + %iptr0.0159 = phi <16 x i32>* [ %incdec.ptr21, %for.body ], [ %29, %for.body.preheader ] + %dXV1.0158 = phi <32 x i32> [ %49, %for.body ], [ %22, %for.body.preheader ] + %dXV0.0157 = phi <32 x i32> [ %dXV1.0158, %for.body ], [ %16, %for.body.preheader ] + %i.0156 = phi i32 [ %sub, %for.body ], [ %width, %for.body.preheader ] + %incdec.ptr21 = getelementptr <16 x i32>, <16 x i32>* %iptr0.0159%30 = load <16 x i32>, <16 x i32>* %iptr0.0159%incdec.ptr22 = getelementptr <16 x i32>, <16 x i32>* %iptr1.0160%31 = load <16 x i32>, <16 x i32>* %iptr1.0160%incdec.ptr23 = getelementptr <16 x i32>, <16 x i32>* %iptr2.0161%32 = load <16 x i32>, <16 x i32>* %iptr2.0161%incdec.ptr24 = getelementptr <16 x i32>, <16 x i32>* %iptr3.0162%33 = load <16 x i32>, <16 x i32>* %iptr3.0162%incdec.ptr25 = getelementptr <16 x i32>, <16 x i32>* %iptr4.0163%34 = load <16 x i32>, <16 x i32>* %iptr4.0163%incdec.ptr26 = getelementptr <16 x i32>, <16 x i32>* %iptr5.0164%35 = load <16 x i32>, <16 x i32>* %iptr5.0164%incdec.ptr27 = getelementptr <16 x i32>, <16 x i32>* %iptr6.0165%36 = load <16 x i32>, <16 x i32>* %iptr6.0165, !tbaa !8 + call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %dXV1.0158) + call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %dXV0.0157) + call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %37, <16 x i32> %38, i32 2) + call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dXV1.0158) + call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dXV0.0157) + call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %40, <16 x i32> %41, i32 2) + call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %37, <16 x i32> %38, i32 4) + call <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32> %36, <16 x i32> %30) + call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %35, <16 x i32> %31) + call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %44, <32 x i32> %45, i32 101058054) + call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %34, <16 x i32> %32) + call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %46, <32 x i32> %47, i32 252645135) + call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %48, <16 x i32> %33, i32 336860180) + call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %49) + call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %50, <16 x i32> %40, i32 2) + call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %49) + call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %52, <16 x i32> %37, i32 2) + call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %50, <16 x i32> %40, i32 4) + call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %37, <16 x i32> %39) + call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %55, <16 x i32> %40) + call <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32> %56, i32 252972820) + call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %51, <16 x i32> %40) + call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %58, <16 x i32> %37) + call <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32> %59, i32 252972820) + call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %53, <16 x i32> %43) + call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %51, <16 x i32> %42) + call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %61, <16 x i32> %62) + call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %57, <32 x i32> %63, i32 17170694) + call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %54, <16 x i32> %42) + call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %53, <16 x i32> %39) + call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %65, <16 x i32> %66) + call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %60, <32 x i32> %67, i32 17170694) + call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %64) + call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %64) + call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %69, <16 x i32> %70, i32 12) + call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %68) + call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %68) + call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %72, <16 x i32> %73, i32 12) + call <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32> %74, <16 x i32> %71) + %incdec.ptr28 = getelementptr <16 x i32>, <16 x i32>* %1 + store <16 x i32> %75, <16 x i32>* %optr.0166%sub = add i32 %i.0156, -64 + %cmp = icmp sgt i32 %sub, 64 + br i1 %cmp, label %for.body, label %for.end +for.end: ret void +} +declare <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32>, i32) +declare <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32>, <32 x i32>, i32) +declare <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32>, <16 x i32>, i32) +declare <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32>, <16 x i32>) + +attributes #0 = { "correctly-rounded-divide-sqrt-fp-math""target-cpu"="hexagonv65" "target-features"="+hvx-length64b,+hvxv65,+v65,-long-calls" "unsafe-fp-math"} +!8 = !{!9, !9, i64 0} +!9 = !{!"omnipotent char", !10} +!10 = !{} +!14 = !{} +!19 = !{} +!24 = !{} + Index: llvm/test/MC/Hexagon/hvx-swapped-regpairs-alias-neg.s =================================================================== --- /dev/null +++ llvm/test/MC/Hexagon/hvx-swapped-regpairs-alias-neg.s @@ -0,0 +1,16 @@ +# RUN: not llvm-mc -arch=hexagon -mcpu=hexagonv67 -mhvx -filetype=asm %s 2>%t; FileCheck --implicit-check-not="error:" %s <%t + +{ + v1:0 = #0 + v0:1 = #0 +} +# CHECK: error: register `V1' modified more than once + +## Unused .tmp: +{ + v1.tmp = vmem(r0 + #3) + v0:1 = vaddw(v17:16, v17:16) +} + +# CHECK: warning: register `V1' used with `.tmp' but not used in the same packet + Index: llvm/test/MC/Hexagon/hvx-swapped-regpairs.s =================================================================== --- /dev/null +++ llvm/test/MC/Hexagon/hvx-swapped-regpairs.s @@ -0,0 +1,44 @@ +# RUN: llvm-mc -filetype=obj -arch=hexagon -mcpu=hexagonv67 -mhvx %s | llvm-objdump -d -mcpu=hexagonv67 -mhvx - | FileCheck %s +# RUN: not llvm-mc -arch=hexagon -mcpu=hexagonv65 -mhvx -filetype=asm %s 2>%t; FileCheck --check-prefix=CHECK-V65 --implicit-check-not="error:" %s <%t + +v1:0.w = vadd(v0.h, v1.h) // Normal +# CHECK: 1ca1c080 + +v0:1.w = vadd(v0.h, v1.h) // Swapped +# CHECK-NEXT: 1ca1c081 +# CHECK-V65: error: register pair `WR0' is not permitted for this architecture + +## Swapped use: +v1:0.w = vtmpy(v0:1.h,r0.b) +# CHECK-NEXT: 19a0c180 +# CHECK-V65: error: register pair `WR0' is not permitted for this architecture + +## Swapped def +v0:1 = v3:2 +# CHECK-NEXT: 1f42c3e1 { v0:1 = vcombine(v3,v2) } +# CHECK-V65: error: register pair `WR0' is not permitted for this architecture + +# Mapped instruction's swapped use: +v1:0 = v2:3 +# CHECK-NEXT: v1:0 = vcombine(v2,v3) +## No error for v65, this is now permitted! + +## .new producer from pair: +{ + v0:1 = vaddw(v0:1, v0:1) + if (!p0) vmem(r0+#0)=v0.new +} +# CHECK-NEXT: v0:1.w = vadd(v0:1.w,v0:1.w) +# CHECK-NEXT: if (!p0) vmem(r0+#0) = v0.new +# CHECK-V65: error: register pair `WR0' is not permitted for this architecture + +## Used .tmp, swapped use & def: +{ + v0.tmp = vmem(r0 + #3) + v2:3 = vaddw(v0:1, v0:1) +} +# CHECK-NEXT: 1c6141c3 { v2:3.w = vadd(v0:1.w,v0:1.w) +# CHECK-NEXT: v0.tmp = vmem(r0+#3) } +# CHECK-V65: error: register pair `WR0' is not permitted for this architecture +# CHECK-V65: error: register pair `WR1' is not permitted for this architecture +