Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10373,24 +10373,6 @@ Ops.push_back(ImpDef.getValue(1)); return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } - case AMDGPU::V_PERMLANE16_B32: - case AMDGPU::V_PERMLANEX16_B32: { - ConstantSDNode *FI = cast(Node->getOperand(0)); - ConstantSDNode *BC = cast(Node->getOperand(2)); - if (!FI->getZExtValue() && !BC->getZExtValue()) - break; - SDValue VDstIn = Node->getOperand(6); - if (VDstIn.isMachineOpcode() - && VDstIn.getMachineOpcode() == AMDGPU::IMPLICIT_DEF) - break; - MachineSDNode *ImpDef = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, - SDLoc(Node), MVT::i32); - SmallVector Ops = { SDValue(FI, 0), Node->getOperand(1), - SDValue(BC, 0), Node->getOperand(3), - Node->getOperand(4), Node->getOperand(5), - SDValue(ImpDef, 0), Node->getOperand(7) }; - return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); - } default: break; } Index: llvm/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -639,6 +639,34 @@ let HasOMod = 0; } +class PermlanePat : GCNPat< + (permlane i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, + timm:$fi, timm:$bc), + (inst (as_i1imm $fi), $src0, (as_i1imm $bc), + $src1, 0, $src2, $vdst_in) +>; + +// Permlane intrinsic that has either fetch invalid or bound control +// fields enabled. +class BoundControlOrFetchInvalidPermlane : + PatFrag<(ops node:$vdst_in, node:$src0, node:$src1, node:$src2, + node:$fi, node:$bc), + (permlane node:$vdst_in, node:$src0, node: + $src1, node:$src2, node:$fi, node:$bc)> { + let PredicateCode = [{ return N->getConstantOperandVal(5) != 0 || + N->getConstantOperandVal(6) != 0; }]; +} + +// Drop the input value if it won't be read. +class PermlaneDiscardVDstIn : GCNPat< + (permlane srcvalue, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), + (inst (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, + (IMPLICIT_DEF)) +>; + + let SubtargetPredicate = isGFX10Plus in { def V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile>; def : ThreeOp_i32_Pats; @@ -648,14 +676,16 @@ def V_PERMLANEX16_B32 : VOP3Inst <"v_permlanex16_b32", VOP3_PERMLANE_Profile>; } // End $vdst = $vdst_in, DisableEncoding $vdst_in - def : GCNPat< - (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), - (V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) - >; - def : GCNPat< - (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), - (V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) - >; + def : PermlanePat; + def : PermlanePat; + + def : PermlaneDiscardVDstIn< + BoundControlOrFetchInvalidPermlane, + V_PERMLANE16_B32>; + def : PermlaneDiscardVDstIn< + BoundControlOrFetchInvalidPermlane, + V_PERMLANEX16_B32>; + } // End SubtargetPredicate = isGFX10Plus //===----------------------------------------------------------------------===//