Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1137,18 +1137,28 @@ (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit >; -} // End let AddedComplexity = 1 +// FIXME: The implicit-def of scc from S_[X]OR_B32 is mishandled + // def : GCNPat < +// (fneg (f64 SReg_64:$src)), +// (REG_SEQUENCE SReg_64, +// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), +// sub0, +// (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), +// (i32 (S_MOV_B32 (i32 0x80000000)))), +// sub1) +// >; + +// def : GCNPat < +// (fneg (fabs (f64 SReg_64:$src))), +// (REG_SEQUENCE SReg_64, +// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), +// sub0, +// (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), +// (S_MOV_B32 (i32 0x80000000))), // Set sign bit. +// sub1) +// >; -// FIXME: Should use S_OR_B32 -def : GCNPat < - (fneg (fabs f64:$src)), - (REG_SEQUENCE VReg_64, - (i32 (EXTRACT_SUBREG f64:$src, sub0)), - sub0, - (V_OR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)), - (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit. - sub1) ->; +} // End let AddedComplexity = 1 def : GCNPat < (fabs (f32 VGPR_32:$src)), @@ -1190,16 +1200,28 @@ sub1) >; +// TODO: Use SGPR for constant def : GCNPat < - (fneg f64:$src), + (fneg (f64 VReg_64:$src)), (REG_SEQUENCE VReg_64, - (i32 (EXTRACT_SUBREG f64:$src, sub0)), + (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)), sub0, - (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)), + (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)), (i32 (V_MOV_B32_e32 (i32 0x80000000)))), sub1) >; +// TODO: Use SGPR for constant +def : GCNPat < + (fneg (fabs (f64 VReg_64:$src))), + (REG_SEQUENCE VReg_64, + (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)), + sub0, + (V_OR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)), + (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit. + sub1) +>; + def : GCNPat < (fcopysign f16:$src0, f16:$src1), (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1) Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir @@ -219,9 +219,13 @@ liveins: $vgpr0_vgpr1 ; GCN-LABEL: name: fabs_s64_vv ; GCN: liveins: $vgpr0_vgpr1 - ; GCN: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GCN: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]] - ; GCN: $vgpr0_vgpr1 = COPY [[FABS]](s64) + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FABS %0 $vgpr0_vgpr1 = COPY %1 Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir @@ -220,9 +220,13 @@ liveins: $vgpr0_vgpr1 ; GCN-LABEL: name: fneg_s64_vv ; GCN: liveins: $vgpr0_vgpr1 - ; GCN: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GCN: [[FNEG:%[0-9]+]]:vreg_64(s64) = G_FNEG [[COPY]] - ; GCN: $vgpr0_vgpr1 = COPY [[FNEG]](s64) + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483648, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e32_]], %subreg.sub1 + ; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FNEG %0 $vgpr0_vgpr1 = COPY %1 @@ -481,10 +485,13 @@ liveins: $vgpr0_vgpr1 ; GCN-LABEL: name: fneg_fabs_s64_vv ; GCN: liveins: $vgpr0_vgpr1 - ; GCN: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GCN: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] - ; GCN: [[FNEG:%[0-9]+]]:vreg_64(s64) = G_FNEG [[FABS]] - ; GCN: $vgpr0_vgpr1 = COPY [[FNEG]](s64) + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483648, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GCN: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e32_]], %subreg.sub1 + ; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FABS %0 %2:vgpr(s64) = G_FNEG %1 @@ -503,9 +510,13 @@ ; GCN-LABEL: name: fneg_fabs_s64_vs ; GCN: liveins: $sgpr0_sgpr1 ; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GCN: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] - ; GCN: [[FNEG:%[0-9]+]]:vreg_64(s64) = G_FNEG [[FABS]] - ; GCN: $vgpr0_vgpr1 = COPY [[FNEG]](s64) + ; GCN: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]] + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 2147483648, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64) + ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e32 [[COPY1]](s32), [[V_MOV_B32_e32_]](s32), implicit $exec + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64) + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e32_]](s16), %subreg.sub1 + ; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]](s64) %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_FABS %0 %2:vgpr(s64) = G_FNEG %1 Index: test/TableGen/GlobalISelEmitterRegSequence.td =================================================================== --- /dev/null +++ test/TableGen/GlobalISelEmitterRegSequence.td @@ -0,0 +1,62 @@ +// RUN: llvm-tblgen %s -gen-global-isel -optimize-match-table=false -I %p/../../include -I %p/Common -o - | FileCheck %s + +include "llvm/Target/Target.td" +include "GlobalISelEmitterCommon.td" + +// Boilerplate code for setting up some registers with subregs. +class MyReg subregs = []> + : Register { + let SubRegs = subregs; +} + +class MyClass types, dag registers> + : RegisterClass<"Test", types, size, registers> { + let Size = size; +} + +def sub0 : SubRegIndex<16>; +def sub1 : SubRegIndex<16, 16>; +def S0 : MyReg<"s0">; +def S1 : MyReg<"s1">; +def SRegs : MyClass<16, [i16], (sequence "S%u", 0, 1)>; + +let SubRegIndices = [sub0, sub1] in { +def D0 : MyReg<"d0", [S0, S1]>; +} + +def DRegs : MyClass<32, [i32], (sequence "D%u", 0, 0)>; +def SOP : RegisterOperand; +def DOP : RegisterOperand; +def SOME_INSN : I<(outs DRegs:$dst), (ins DOP:$src), []>; +def SUBSOME_INSN : I<(outs SRegs:$dst), (ins SOP:$src), []>; + +// CHECK: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2, +// CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_SEXT, +// CHECK-NEXT: // MIs[0] dst +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/Test::DRegsRegClassID, +// CHECK-NEXT: // MIs[0] src +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s16, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/1, /*RC*/Test::SRegsRegClassID, +// CHECK-NEXT: // (sext:{ *:[i32] } SOP:{ *:[i16] }:$src) => (REG_SEQUENCE:{ *:[i32] } DRegs:{ *:[i32] }, (SUBSOME_INSN:{ *:[i16] } SOP:{ *:[i16] }:$src), sub0:{ *:[i32] }, (SUBSOME_INSN:{ *:[i16] } SOP:{ *:[i16] }:$src), sub1:{ *:[i32] }) +// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s16, +// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/GILLT_s16, +// CHECK-NEXT: GIR_BuildMI, /*InsnID*/2, /*Opcode*/MyTarget::SUBSOME_INSN, +// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/2, /*TempRegID*/1, /*TempRegFlags*/RegState::Define, +// CHECK-NEXT: GIR_Copy, /*NewInsnID*/2, /*OldInsnID*/0, /*OpIdx*/1, // src +// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/2, +// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/MyTarget::SUBSOME_INSN, +// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/RegState::Define, +// CHECK-NEXT: GIR_Copy, /*NewInsnID*/1, /*OldInsnID*/0, /*OpIdx*/1, // src +// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/1, +// CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::REG_SEQUENCE, +// CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst +// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0, +// CHECK-NEXT: GIR_AddImm, /*InsnID*/0, /*SubRegIndex*/1, +// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/1, /*TempRegFlags*/0, +// CHECK-NEXT: GIR_AddImm, /*InsnID*/0, /*SubRegIndex*/2, +// CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, +// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/0, +def : Pat<(i32 (sext SOP:$src)), + (REG_SEQUENCE DRegs, (SUBSOME_INSN SOP:$src), sub0, + (SUBSOME_INSN SOP:$src), sub1)>; Index: utils/TableGen/GlobalISelEmitter.cpp =================================================================== --- utils/TableGen/GlobalISelEmitter.cpp +++ utils/TableGen/GlobalISelEmitter.cpp @@ -2269,6 +2269,7 @@ OR_CopyConstantAsImm, OR_CopyFConstantAsFPImm, OR_Imm, + OR_SubRegIndex, OR_Register, OR_TempRegister, OR_ComplexPattern, @@ -2540,6 +2541,28 @@ } }; +/// Adds an enum value for a subreg index to the instruction being built. +class SubRegIndexRenderer : public OperandRenderer { +protected: + unsigned InsnID; + const CodeGenSubRegIndex *SubRegIdx; + +public: + SubRegIndexRenderer(unsigned InsnID, const CodeGenSubRegIndex *SRI) + : OperandRenderer(OR_SubRegIndex), InsnID(InsnID), SubRegIdx(SRI) {} + + static bool classof(const OperandRenderer *R) { + return R->getKind() == OR_SubRegIndex; + } + + void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIR_AddImm") << MatchTable::Comment("InsnID") + << MatchTable::IntValue(InsnID) << MatchTable::Comment("SubRegIndex") + << MatchTable::IntValue(SubRegIdx->EnumValue) + << MatchTable::LineBreak; + } +}; + /// Adds operands by calling a renderer function supplied by the ComplexPattern /// matcher function. class RenderComplexPatternOperand : public OperandRenderer { @@ -2820,7 +2843,9 @@ public: MakeTempRegisterAction(const LLTCodeGen &Ty, unsigned TempRegID) - : Ty(Ty), TempRegID(TempRegID) {} + : Ty(Ty), TempRegID(TempRegID) { + KnownTypes.insert(Ty); + } void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override { Table << MatchTable::Opcode("GIR_MakeTempReg") @@ -4026,12 +4051,9 @@ // COPY_TO_REGCLASS is just a copy with a ConstrainOperandToRegClassAction // attached. Similarly for EXTRACT_SUBREG except that's a subregister copy. - if (DstI->TheDef->getName() == "COPY_TO_REGCLASS") - DstI = &Target.getInstruction(RK.getDef("COPY")); - else if (DstI->TheDef->getName() == "EXTRACT_SUBREG") + StringRef Name = DstI->TheDef->getName(); + if (Name == "COPY_TO_REGCLASS" || Name == "EXTRACT_SUBREG") DstI = &Target.getInstruction(RK.getDef("COPY")); - else if (DstI->TheDef->getName() == "REG_SEQUENCE") - return failedImport("Unable to emit REG_SEQUENCE"); return M.insertAction(InsertPt, M.allocateOutputInsnID(), DstI); @@ -4052,8 +4074,11 @@ const CodeGenInstruction *DstI = DstMIBuilder.getCGI(); CodeGenInstruction *OrigDstI = &Target.getInstruction(Dst->getOperator()); + StringRef Name = OrigDstI->TheDef->getName(); + unsigned ExpectedDstINumUses = Dst->getNumChildren(); + // EXTRACT_SUBREG needs to use a subregister COPY. - if (OrigDstI->TheDef->getName() == "EXTRACT_SUBREG") { + if (Name == "EXTRACT_SUBREG") { if (!Dst->getChild(0)->isLeaf()) return failedImport("EXTRACT_SUBREG child #1 is not a leaf"); @@ -4083,10 +4108,42 @@ return failedImport("EXTRACT_SUBREG child #1 is not a subreg index"); } + if (Name == "REG_SEQUENCE") { + if (!Dst->getChild(0)->isLeaf()) + return failedImport("REG_SEQUENCE child #0 is not a leaf"); + + Record *RCDef = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()); + if (!RCDef) { + return failedImport("REG_SEQUENCE child #0 could not " + "be coerced to a register class"); + } + + if ((ExpectedDstINumUses - 1) % 2 != 0) + return failedImport("Malformed REG_SEQUENCE"); + + for (unsigned I = 1; I != ExpectedDstINumUses; I += 2) { + TreePatternNode *ValChild = Dst->getChild(I); + TreePatternNode *SubRegChild = Dst->getChild(I + 1); + + if (DefInit *SubRegInit = + dyn_cast(SubRegChild->getLeafValue())) { + CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef()); + + auto InsertPtOrError = + importExplicitUseRenderer(InsertPt, M, DstMIBuilder, ValChild); + if (auto Error = InsertPtOrError.takeError()) + return std::move(Error); + InsertPt = InsertPtOrError.get(); + DstMIBuilder.addRenderer(SubIdx); + } + } + + return InsertPt; + } + // Render the explicit uses. unsigned DstINumUses = OrigDstI->Operands.size() - OrigDstI->Operands.NumDefs; - unsigned ExpectedDstINumUses = Dst->getNumChildren(); - if (OrigDstI->TheDef->getName() == "COPY_TO_REGCLASS") { + if (Name == "COPY_TO_REGCLASS") { DstINumUses--; // Ignore the class constraint. ExpectedDstINumUses--; }