diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -73,6 +73,14 @@ } }; +enum { + /// Ignore COPY when looking at operands. + /// Affects: + /// GIM_RecordIsns + /// GIM_CheckIsSameOperand + GIMSF_IgnoreCopies = 0x1, +}; + enum { /// Begin a try-block to attempt a match and jump to OnFail if it is /// unsuccessful. @@ -86,6 +94,16 @@ /// failed match. GIM_Try, + /// Sets SelectorFlags (GIMSF_*) to alter instruction selection behaviour in + /// the + /// current try-block. + /// Note that the change is propagated downwards to nested try-blocks, but + /// never to parent try-blocks. Flags are saved on a stack and new try-blocks + /// duplicate the latest stack entry, and when a try-block ends (is rejected), + /// the stack is popped once. + /// - Flag - SelectorFlag to set (GIMSF_*) + GIM_SetFlag, + /// Switch over the opcode on the specified instruction /// - InsnID - Instruction ID /// - LowerBound - numerically minimum opcode supported @@ -364,7 +382,8 @@ /// reading from a specific operand. /// - InsnID - Instruction ID to modify /// - OldInsnID - Instruction ID to get the matched operand from - /// - OpIdx - Operand index in OldInsnID the render function should read from.. + /// - OpIdx - Operand index in OldInsnID the render function should read + /// from.. /// - RendererFnID - Custom renderer function to call GIR_CustomOperandRenderer, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -59,20 +59,33 @@ uint64_t CurrentIdx = 0; SmallVector OnFailResumeAt; + // Keep track of selector flags per try-block. Keep an empty value at the + // bottom of the stack so calling .back() is always valid. + SmallVector SelectorFlags = {0}; + // Bypass the flag check on the instruction, and only look at the MCInstrDesc. bool NoFPException = !State.MIs[0]->getDesc().mayRaiseFPException(); const uint16_t Flags = State.MIs[0]->getFlags(); enum RejectAction { RejectAndGiveUp, RejectAndResume }; + + auto handleTryBlock = [&](uint64_t ResumeAt) { + OnFailResumeAt.push_back(ResumeAt); + SelectorFlags.push_back(SelectorFlags.back()); + }; + auto handleReject = [&]() -> RejectAction { DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << CurrentIdx << ": Rejected\n"); if (OnFailResumeAt.empty()) return RejectAndGiveUp; CurrentIdx = OnFailResumeAt.pop_back_val(); + (void)SelectorFlags.pop_back_val(); DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), - dbgs() << CurrentIdx << ": Resume at " << CurrentIdx << " (" + dbgs() << CurrentIdx << ": Resume at " << CurrentIdx + << " (SelectorFlags: " + << format_hex(SelectorFlags.back(), 8) << ", " << OnFailResumeAt.size() << " try-blocks remain)\n"); return RejectAndResume; }; @@ -97,10 +110,20 @@ case GIM_Try: { DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << CurrentIdx << ": Begin try-block\n"); - OnFailResumeAt.push_back(MatchTable[CurrentIdx++]); + handleTryBlock(MatchTable[CurrentIdx++]); + break; + } + case GIM_SetFlag: { + uint64_t Value = uint64_t(MatchTable[CurrentIdx++]); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_SetFlag " + << format_hex(Value, 8) << ": " + << format_hex(SelectorFlags.back(), 8) << " -> "); + SelectorFlags.back() |= Value; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << format_hex(SelectorFlags.back(), 8) << "\n"); break; } - case GIM_RecordInsn: { int64_t NewInsnID = MatchTable[CurrentIdx++]; int64_t InsnID = MatchTable[CurrentIdx++]; @@ -126,7 +149,12 @@ break; } - MachineInstr *NewMI = MRI.getVRegDef(MO.getReg()); + MachineInstr *NewMI; + if (SelectorFlags.back() & GIMSF_IgnoreCopies) + NewMI = getDefIgnoringCopies(MO.getReg(), MRI); + else + NewMI = MRI.getVRegDef(MO.getReg()); + if ((size_t)NewInsnID < State.MIs.size()) State.MIs[NewInsnID] = NewMI; else { @@ -203,7 +231,7 @@ CurrentIdx = Default; break; } - OnFailResumeAt.push_back(Default); + handleTryBlock(Default); break; } @@ -247,7 +275,7 @@ CurrentIdx = Default; break; } - OnFailResumeAt.push_back(Default); + handleTryBlock(Default); break; } @@ -827,8 +855,20 @@ << OtherInsnID << "][" << OtherOpIdx << "])\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); assert(State.MIs[OtherInsnID] != nullptr && "Used insn before defined"); - if (!State.MIs[InsnID]->getOperand(OpIdx).isIdenticalTo( - State.MIs[OtherInsnID]->getOperand(OtherOpIdx))) { + + MachineOperand &Op = State.MIs[InsnID]->getOperand(OpIdx); + MachineOperand &OtherOp = State.MIs[OtherInsnID]->getOperand(OtherOpIdx); + + if (SelectorFlags.back() & GIMSF_IgnoreCopies) { + if (Op.isReg() && OtherOp.isReg()) { + MachineInstr *MI = getDefIgnoringCopies(Op.getReg(), MRI); + MachineInstr *OtherMI = getDefIgnoringCopies(OtherOp.getReg(), MRI); + if (MI && MI == OtherMI) + break; + } + } + + if (!Op.isIdenticalTo(OtherOp)) { if (handleReject() == RejectAndGiveUp) return false; } diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -1854,6 +1854,10 @@ list ResultInstrs = resultInstrs; list Predicates = []; // See class Instruction in Target.td. int AddedComplexity = 0; // See class Instruction in Target.td. + + // When this is set, the GISel matcher will look through COPY instructions + // before checking opcodes for instruction operands. + bit GISelIgnoreCopies = ?; } // Pat - A simple (but common) form of a pattern, which produces a simple result diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2043,53 +2043,55 @@ }] >; -// Definition from ISA doc: -// (y & x) | (z & ~x) -def : AMDGPUPat < - (DivergentBinFrag (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))), - (V_BFI_B32_e64 (COPY_TO_REGCLASS VSrc_b32:$x, VGPR_32), - (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32), - (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32)) ->; +let GISelIgnoreCopies = 1 in { + // Definition from ISA doc: + // (y & x) | (z & ~x) + def : AMDGPUPat < + (DivergentBinFrag (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))), + (V_BFI_B32_e64 (COPY_TO_REGCLASS VSrc_b32:$x, VGPR_32), + (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32), + (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32)) + >; -// (y & C) | (z & ~C) -def : AMDGPUPat < - (BFIImm32 i32:$x, i32:$y, i32:$z), - (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z) ->; + // (y & C) | (z & ~C) + def : AMDGPUPat < + (BFIImm32 i32:$x, i32:$y, i32:$z), + (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z) + >; -// 64-bit version -def : AMDGPUPat < - (DivergentBinFrag (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))), - (REG_SEQUENCE VReg_64, - (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)), - (i32 (EXTRACT_SUBREG VReg_64:$y, sub0)), - (i32 (EXTRACT_SUBREG VReg_64:$z, sub0))), sub0, - (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)), - (i32 (EXTRACT_SUBREG VReg_64:$y, sub1)), - (i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1) ->; + // 64-bit version + def : AMDGPUPat < + (DivergentBinFrag (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))), + (REG_SEQUENCE VReg_64, + (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)), + (i32 (EXTRACT_SUBREG VReg_64:$y, sub0)), + (i32 (EXTRACT_SUBREG VReg_64:$z, sub0))), sub0, + (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)), + (i32 (EXTRACT_SUBREG VReg_64:$y, sub1)), + (i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1) + >; -// SHA-256 Ch function -// z ^ (x & (y ^ z)) -def : AMDGPUPat < - (DivergentBinFrag i32:$z, (and i32:$x, (xor i32:$y, i32:$z))), - (V_BFI_B32_e64 (COPY_TO_REGCLASS VSrc_b32:$x, VGPR_32), - (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32), - (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32)) ->; + // SHA-256 Ch function + // z ^ (x & (y ^ z)) + def : AMDGPUPat < + (DivergentBinFrag i32:$z, (and i32:$x, (xor i32:$y, i32:$z))), + (V_BFI_B32_e64 (COPY_TO_REGCLASS VSrc_b32:$x, VGPR_32), + (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32), + (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32)) + >; -// 64-bit version -def : AMDGPUPat < - (DivergentBinFrag i64:$z, (and i64:$x, (xor i64:$y, i64:$z))), - (REG_SEQUENCE VReg_64, - (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)), - (i32 (EXTRACT_SUBREG VReg_64:$y, sub0)), - (i32 (EXTRACT_SUBREG VReg_64:$z, sub0))), sub0, - (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)), - (i32 (EXTRACT_SUBREG VReg_64:$y, sub1)), - (i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1) ->; + // 64-bit version + def : AMDGPUPat < + (DivergentBinFrag i64:$z, (and i64:$x, (xor i64:$y, i64:$z))), + (REG_SEQUENCE VReg_64, + (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)), + (i32 (EXTRACT_SUBREG VReg_64:$y, sub0)), + (i32 (EXTRACT_SUBREG VReg_64:$z, sub0))), sub0, + (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)), + (i32 (EXTRACT_SUBREG VReg_64:$y, sub1)), + (i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1) + >; +} def : AMDGPUPat < (fcopysign f32:$src0, f32:$src1), @@ -3189,28 +3191,30 @@ // SHA-256 Ma patterns // ((x & z) | (y & (x | z))) -> BFI (XOR x, y), z, y -def : AMDGPUPat < - (DivergentBinFrag (and i32:$x, i32:$z), - (and i32:$y, (or i32:$x, i32:$z))), - (V_BFI_B32_e64 (V_XOR_B32_e64 (COPY_TO_REGCLASS VSrc_b32:$x, VGPR_32), - (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32)), - (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32), - (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32)) ->; +let GISelIgnoreCopies = 1 in { + def : AMDGPUPat < + (DivergentBinFrag (and i32:$x, i32:$z), + (and i32:$y, (or i32:$x, i32:$z))), + (V_BFI_B32_e64 (V_XOR_B32_e64 (COPY_TO_REGCLASS VSrc_b32:$x, VGPR_32), + (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32)), + (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32), + (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32)) + >; -def : AMDGPUPat < - (DivergentBinFrag (and i64:$x, i64:$z), - (and i64:$y, (or i64:$x, i64:$z))), - (REG_SEQUENCE VReg_64, - (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)), - (i32 (EXTRACT_SUBREG VReg_64:$y, sub0))), - (i32 (EXTRACT_SUBREG VReg_64:$z, sub0)), - (i32 (EXTRACT_SUBREG VReg_64:$y, sub0))), sub0, - (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)), - (i32 (EXTRACT_SUBREG VReg_64:$y, sub1))), - (i32 (EXTRACT_SUBREG VReg_64:$z, sub1)), - (i32 (EXTRACT_SUBREG VReg_64:$y, sub1))), sub1) ->; + def : AMDGPUPat < + (DivergentBinFrag (and i64:$x, i64:$z), + (and i64:$y, (or i64:$x, i64:$z))), + (REG_SEQUENCE VReg_64, + (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)), + (i32 (EXTRACT_SUBREG VReg_64:$y, sub0))), + (i32 (EXTRACT_SUBREG VReg_64:$z, sub0)), + (i32 (EXTRACT_SUBREG VReg_64:$y, sub0))), sub0, + (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)), + (i32 (EXTRACT_SUBREG VReg_64:$y, sub1))), + (i32 (EXTRACT_SUBREG VReg_64:$z, sub1)), + (i32 (EXTRACT_SUBREG VReg_64:$y, sub1))), sub1) + >; +} multiclass IntMed3Pat popFirstCondition() = 0; + + void setIgnoreCopies(bool Value = true) { IgnoreCopies = Value; } + bool getIgnoreCopies() const { return IgnoreCopies; } + + void emitSetFlags(MatchTable &Table) { + // Note: If more flags are added, we can just emit one SetFlag and & the + // flags together. + if (IgnoreCopies) { + Table << MatchTable::Opcode("GIM_SetFlag") + << MatchTable::NamedValue("GIMSF_IgnoreCopies") + << MatchTable::LineBreak; + } + } }; MatchTable MatchTable::buildTable(ArrayRef Rules, @@ -3388,6 +3403,7 @@ << MatchTable::JumpTarget(LabelID) << MatchTable::Comment(("Rule ID " + Twine(RuleID) + " //").str()) << MatchTable::LineBreak; + emitSetFlags(Table); if (!RequiredFeatures.empty()) { Table << MatchTable::Opcode("GIM_CheckFeatures") @@ -5211,6 +5227,13 @@ // before their first use.) InstructionMatcher &InsnMatcherTemp = M.addInstructionMatcher(Src->getName()); unsigned TempOpIdx = 0; + + if (P.getSrcRecord()->isSubClassOf("Pattern")) { + bool Unset = false; + M.setIgnoreCopies( + P.getSrcRecord()->getValueAsBitOrUnset("GISelIgnoreCopies", Unset)); + } + auto InsnMatcherOrError = createAndImportSelDAGMatcher(M, InsnMatcherTemp, Src, TempOpIdx); if (auto Error = InsnMatcherOrError.takeError()) @@ -5671,6 +5694,20 @@ if (T != E) F = ++T; } + + // Optimize flags: If all RuleMatchers have the same flag set, unset it and + // set it here instead so GIM_SetFlag is called only once. + if (!Matchers.empty()) { + const bool AllHaveIgnoreCopies = + all_of(Matchers, [&](Matcher *M) { return M->getIgnoreCopies(); }); + + if (AllHaveIgnoreCopies) { + setIgnoreCopies(); + for (Matcher *M : Matchers) + M->setIgnoreCopies(false); + } + } + GlobalISelEmitter::optimizeRules(Matchers, MatcherStorage) .swap(Matchers); GlobalISelEmitter::optimizeRules(Matchers, MatcherStorage) @@ -6159,6 +6196,7 @@ Table << MatchTable::Opcode("GIM_Try", +1) << MatchTable::Comment("On fail goto") << MatchTable::JumpTarget(LabelID) << MatchTable::LineBreak; + emitSetFlags(Table); } for (auto &Condition : Conditions) Condition->emitPredicateOpcodes(