Index: lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- lib/Target/AMDGPU/AMDGPUGISel.td +++ lib/Target/AMDGPU/AMDGPUGISel.td @@ -18,6 +18,11 @@ GIComplexOperandMatcher, GIComplexPatternEquiv; +def sd_vcsrc : ComplexPattern; +def gi_vcsrc : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def gi_vop3mods0 : GIComplexOperandMatcher, GIComplexPatternEquiv; @@ -60,6 +65,26 @@ (inst src0_vt:$src0, src1_vt:$src1) >; +class GISelVop3Pat2 < + SDPatternOperator node, + Instruction inst, + ValueType dst_vt, + ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat < + + (dst_vt (node (src0_vt (sd_vcsrc src0_vt:$src0)), (src1_vt (sd_vcsrc src1_vt:$src1)))), + (inst src0_vt:$src0, src1_vt:$src1) +>; + +class GISelVop3Pat2CommutePat < + SDPatternOperator node, + Instruction inst, + ValueType dst_vt, + ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat < + + (dst_vt (node (src0_vt (sd_vcsrc src0_vt:$src0)), (src1_vt (sd_vcsrc src1_vt:$src1)))), + (inst src0_vt:$src1, src1_vt:$src0) +>; + multiclass GISelVop2IntrPat < SDPatternOperator node, Instruction inst, ValueType dst_vt, ValueType src_vt = dst_vt> { @@ -76,6 +101,15 @@ def : GISelSop2Pat ; def : GISelVop2Pat ; +def : GISelSop2Pat ; +let AddedComplexity = 100 in { +let SubtargetPredicate = isSICI in { +def : GISelVop2Pat ; +} +def : GISelVop2CommutePat ; +} +def : GISelVop3Pat2CommutePat ; + // FIXME: Select directly to _e32 so we don't need to deal with modifiers. // FIXME: We can't re-use SelectionDAG patterns here because they match // against a custom SDNode and we would need to create a generic machine Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -74,6 +74,9 @@ bool selectG_LOAD(MachineInstr &I) const; bool selectG_STORE(MachineInstr &I) const; + InstructionSelector::ComplexRendererFns + selectVCSRC(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns selectVSRC0(MachineOperand &Root) const; Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -604,6 +604,7 @@ switch (I.getOpcode()) { default: break; + case TargetOpcode::G_ASHR: case TargetOpcode::G_SITOFP: case TargetOpcode::G_FMUL: case TargetOpcode::G_FADD: @@ -633,6 +634,14 @@ return false; } +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); } + }}; + +} + /// /// This will select either an SGPR or VGPR operand and will save us from /// having to write an extra tablegen pattern. Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -55,6 +55,7 @@ }; setAction({G_ADD, S32}, Legal); + setAction({G_ASHR, S32}, Legal); setAction({G_SUB, S32}, Legal); setAction({G_MUL, S32}, Legal); setAction({G_AND, S32}, Legal); Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir @@ -0,0 +1,86 @@ +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,SI +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,VI + +--- | + define void @ashr(i32 addrspace(1)* %global0) {ret void} +... +--- + +name: ashr +legalized: true +regBankSelected: true + +# GCN-LABEL: name: ashr +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 + ; GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[SGPR1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN: [[VGPR0:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:vgpr(s32) = COPY $vgpr0 + %3:vgpr(s64) = COPY $vgpr3_vgpr4 + + ; GCN: [[C1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1 + ; GCN: [[C4096:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 + %4:sgpr(s32) = G_CONSTANT i32 1 + %5:sgpr(s32) = G_CONSTANT i32 4096 + + ; ashr ss + ; GCN: [[SS:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[SGPR0]], [[SGPR1]] + %6:sgpr(s32) = G_ASHR %0, %1 + + ; ashr si + ; GCN: [[SI:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[SS]], [[C1]] + %7:sgpr(s32) = G_ASHR %6, %4 + + ; ashr is + ; GCN: [[IS:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[C1]], [[SI]] + %8:sgpr(s32) = G_ASHR %4, %7 + + ; ashr sc + ; GCN: [[SC:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[IS]], [[C4096]] + %9:sgpr(s32) = G_ASHR %8, %5 + + ; ashr cs + ; GCN: [[CS:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[C4096]], [[SC]] + %10:sgpr(s32) = G_ASHR %5, %9 + + ; ashr vs + ; GCN: [[VS:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[CS]], [[VGPR0]] + %11:vgpr(s32) = G_ASHR %2, %10 + + ; ashr sv + ; SI: [[SV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[CS]], [[VS]] + ; VI: [[SV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[VS]], [[CS]] + %12:vgpr(s32) = G_ASHR %10, %11 + + ; ashr vv + ; SI: [[VV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[SV]], [[VGPR0]] + ; VI: [[VV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[VGPR0]], [[SV]] + %13:vgpr(s32) = G_ASHR %12, %2 + + ; ashr iv + ; SI: [[IV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[C1]], [[VV]] + ; VI: [[IV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[VV]], [[C1]] + %14:vgpr(s32) = G_ASHR %4, %13 + + ; ashr vi + ; GCN: [[VI:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[C1]], [[IV]] + %15:vgpr(s32) = G_ASHR %14, %4 + + ; ashr cv + ; SI: [[CV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[C4096]], [[VI]] + ; VI: [[CV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[VI]], [[C4096]] + %16:vgpr(s32) = G_ASHR %5, %15 + + ; ashr vc + ; GCN: [[VC:%[-1-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[C4096]], [[CV]] + %17:vgpr(s32) = G_ASHR %16, %5 + + + G_STORE %17, %3 :: (store 4 into %ir.global0) + +... +--- Index: test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -0,0 +1,22 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- +name: test_ashr +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0.entry: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_ashr + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]] + %0(s32) = COPY $vgpr0 + %1(s32) = COPY $vgpr1 + %2(s32) = G_ASHR %0, %1 + $vgpr0 = COPY %2 +...