Index: llvm/include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1156,7 +1156,7 @@ llvm_i1_ty, // done llvm_i1_ty // vm ], - [ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrInaccessibleMemOnly] + [ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrWriteMem, IntrInaccessibleMemOnly] >; // exp with compr bit set. @@ -1167,7 +1167,7 @@ LLVMMatchType<0>, // src1 llvm_i1_ty, // done llvm_i1_ty], // vm - [ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrInaccessibleMemOnly] + [ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrWriteMem, IntrInaccessibleMemOnly] >; def int_amdgcn_buffer_wbinvl1_sc : Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -433,8 +433,6 @@ MUL_LOHI_U24, PERM, TEXTURE_FETCH, - EXPORT, // exp on SI+ - EXPORT_DONE, // exp on SI+ with done bit set R600_EXPORT, CONST_ADDRESS, REGISTER_LOAD, Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4275,8 +4275,6 @@ NODE_NAME_CASE(MAD_U64_U32) NODE_NAME_CASE(PERM) NODE_NAME_CASE(TEXTURE_FETCH) - NODE_NAME_CASE(EXPORT) - NODE_NAME_CASE(EXPORT_DONE) NODE_NAME_CASE(R600_EXPORT) NODE_NAME_CASE(CONST_ADDRESS) NODE_NAME_CASE(REGISTER_LOAD) Index: llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -358,12 +358,6 @@ ]>; -def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp, - [SDNPHasChain, SDNPMayStore]>; - -def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp, - [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; - def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6752,52 +6752,29 @@ MachineFunction &MF = DAG.getMachineFunction(); switch (IntrinsicID) { - case Intrinsic::amdgcn_exp: { - const ConstantSDNode *Tgt = cast(Op.getOperand(2)); - const ConstantSDNode *En = cast(Op.getOperand(3)); - const ConstantSDNode *Done = cast(Op.getOperand(8)); - const ConstantSDNode *VM = cast(Op.getOperand(9)); - - const SDValue Ops[] = { - Chain, - DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt - DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en - Op.getOperand(4), // src0 - Op.getOperand(5), // src1 - Op.getOperand(6), // src2 - Op.getOperand(7), // src3 - DAG.getTargetConstant(0, DL, MVT::i1), // compr - DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1) - }; - - unsigned Opc = Done->isNullValue() ? - AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE; - return DAG.getNode(Opc, DL, Op->getVTList(), Ops); - } case Intrinsic::amdgcn_exp_compr: { - const ConstantSDNode *Tgt = cast(Op.getOperand(2)); - const ConstantSDNode *En = cast(Op.getOperand(3)); SDValue Src0 = Op.getOperand(4); SDValue Src1 = Op.getOperand(5); - const ConstantSDNode *Done = cast(Op.getOperand(6)); - const ConstantSDNode *VM = cast(Op.getOperand(7)); + // Hack around illegal type on SI by directly selecting it. + if (isTypeLegal(Src0.getValueType())) + return SDValue(); + const ConstantSDNode *Done = cast(Op.getOperand(6)); SDValue Undef = DAG.getUNDEF(MVT::f32); const SDValue Ops[] = { - Chain, - DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt - DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en - DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0), - DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1), + Op.getOperand(2), // tgt + DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0), // src0 + DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1), // src1 Undef, // src2 Undef, // src3 + Op.getOperand(7), // vm DAG.getTargetConstant(1, DL, MVT::i1), // compr - DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1) + Op.getOperand(3), // en + Op.getOperand(0) // Chain }; - unsigned Opc = Done->isNullValue() ? - AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE; - return DAG.getNode(Opc, DL, Op->getVTList(), Ops); + unsigned Opc = Done->isNullValue() ? AMDGPU::EXP : AMDGPU::EXP_DONE; + return SDValue(DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops), 0); } case Intrinsic::amdgcn_s_barrier: { if (getTargetMachine().getOptLevel() > CodeGenOpt::None) { Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1093,7 +1093,7 @@ def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>; -def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> { +def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> { } @@ -1373,24 +1373,21 @@ // EXP classes //===----------------------------------------------------------------------===// -class EXP_Helper : EXPCommon< +class EXP_Helper : EXPCommon< (outs), (ins exp_tgt:$tgt, ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3, - exp_vm:$vm, exp_compr:$compr, i8imm:$en), - "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm", - [(node (i8 timm:$tgt), (i8 timm:$en), - f32:$src0, f32:$src1, f32:$src2, f32:$src3, - (i1 timm:$compr), (i1 timm:$vm))]> { + exp_vm:$vm, exp_compr:$compr, i32imm:$en), + "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm", []> { let AsmMatchConverter = "cvtExp"; } // Split EXP instruction into EXP and EXP_DONE so we can set // mayLoad for done=1. -multiclass EXP_m { +multiclass EXP_m { let mayLoad = done, DisableWQM = 1 in { let isPseudo = 1, isCodeGenOnly = 1 in { - def "" : EXP_Helper, + def "" : EXP_Helper, SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>; } Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -24,8 +24,41 @@ // EXP Instructions //===----------------------------------------------------------------------===// -defm EXP : EXP_m<0, AMDGPUexport>; -defm EXP_DONE : EXP_m<1, AMDGPUexport_done>; +defm EXP : EXP_m<0>; +defm EXP_DONE : EXP_m<1>; + +// FIXME: GlobalISel successfully imports this pattern, but fails to +// select because the i1 done_val does a type check on done_val, which +// only works on register operands. +class ExpPattern : GCNPat< + (int_amdgcn_exp timm:$tgt, timm:$en, + (vt ExpSrc0:$src0), (vt ExpSrc1:$src1), + (vt ExpSrc2:$src2), (vt ExpSrc3:$src3), + done_val, timm:$vm), + (Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1, + ExpSrc2:$src2, ExpSrc3:$src3, timm:$vm, 0, timm:$en) +>; + +class ExpComprPattern : GCNPat< + (int_amdgcn_exp_compr timm:$tgt, timm:$en, + (vt ExpSrc0:$src0), (vt ExpSrc1:$src1), + done_val, timm:$vm), + (Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1, + (IMPLICIT_DEF), (IMPLICIT_DEF), timm:$vm, 1, timm:$en) +>; + +// FIXME: The generated DAG matcher seems to have strange behavior +// with a 1-bit literal to match, so use a -1 for checking a true +// 1-bit value. +def : ExpPattern; +def : ExpPattern; +def : ExpPattern; +def : ExpPattern; + +def : ExpComprPattern; +def : ExpComprPattern; +def : ExpComprPattern; +def : ExpComprPattern; //===----------------------------------------------------------------------===// // VINTRP Instructions @@ -1781,15 +1814,6 @@ SRCMODS.NONE, $src2, $clamp, $omod) >; -// Allow integer inputs -class ExpPattern : GCNPat< - (node (i8 timm:$tgt), (i8 timm:$en), vt:$src0, vt:$src1, vt:$src2, vt:$src3, (i1 timm:$compr), (i1 timm:$vm)), - (Inst i8:$tgt, vt:$src0, vt:$src1, vt:$src2, vt:$src3, i1:$vm, i1:$compr, i8:$en) ->; - -def : ExpPattern; -def : ExpPattern; - // COPY is workaround tablegen bug from multiple outputs // from S_LSHL_B32's multiple outputs from implicit scc def. def : GCNPat <