Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -271,7 +271,9 @@ MAD_U24, MAD_I24, TEXTURE_FETCH, - EXPORT, + EXPORT, // exp on SI+ + EXPORT_DONE, // exp on SI+ with done bit set + R600_EXPORT, CONST_ADDRESS, REGISTER_LOAD, REGISTER_STORE, Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2843,6 +2843,8 @@ NODE_NAME_CASE(MAD_I24) NODE_NAME_CASE(TEXTURE_FETCH) NODE_NAME_CASE(EXPORT) + NODE_NAME_CASE(EXPORT_DONE) + NODE_NAME_CASE(R600_EXPORT) NODE_NAME_CASE(CONST_ADDRESS) NODE_NAME_CASE(REGISTER_LOAD) NODE_NAME_CASE(REGISTER_STORE) Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -245,6 +245,31 @@ SDTypeProfile<1, 4, [SDTCisFP<0>]>, [SDNPInGlue]>; +// SI+ export +def AMDGPUExportOp : SDTypeProfile<0, 8, [ + SDTCisInt<0>, // i32 en + SDTCisInt<1>, // i1 vm + // skip done + SDTCisSameAs<2, 0>, // i32 tgt + SDTCisSameAs<3, 1>, // i1 compr + SDTCisFP<4>, // f32 src0 + SDTCisSameAs<5, 4>, // f32 src1 + SDTCisSameAs<6, 4>, // f32 src2 + SDTCisSameAs<7, 4> // f32 src3 +]>; + +def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp, + [SDNPHasChain, SDNPMayStore]>; + +def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp, + [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>; + + +def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; + +def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp, + [SDNPHasChain, SDNPSideEffect]>; + //===----------------------------------------------------------------------===// // Flow Control Profile Types //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -653,7 +653,7 @@ DAG.getConstant(2, DL, MVT::i32), // SWZ_Z DAG.getConstant(3, DL, MVT::i32) // SWZ_W }; - return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args); + return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args); } // default for switch(IntrinsicID) @@ -2112,7 +2112,7 @@ return SDValue(); } - case AMDGPUISD::EXPORT: { + case AMDGPUISD::R600_EXPORT: { SDValue Arg = N->getOperand(1); if (Arg.getOpcode() != ISD::BUILD_VECTOR) break; @@ -2129,7 +2129,7 @@ }; SDLoc DL(N); NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL); - return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs); + return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs); } case AMDGPUISD::TEXTURE_FETCH: { SDValue Arg = N->getOperand(1); Index: lib/Target/AMDGPU/R600Instructions.td =================================================================== --- lib/Target/AMDGPU/R600Instructions.td +++ lib/Target/AMDGPU/R600Instructions.td @@ -450,11 +450,6 @@ // Export Instructions //===----------------------------------------------------------------------===// -def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; - -def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, - [SDNPHasChain, SDNPSideEffect]>; - class ExportWord0 { field bits<32> Word0; @@ -500,7 +495,7 @@ } multiclass ExportPattern cf_inst> { - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), + def : Pat<(R600_EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), (ExportInst R600_Reg128:$src, imm:$type, imm:$base, imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1980,6 +1980,7 @@ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, Op->getVTList(), Ops, VT, MMO); } + case AMDGPUIntrinsic::AMDGPU_kill: { if (const ConstantFPSDNode *K = dyn_cast(Op.getOperand(2))) { if (!K->isNegative()) @@ -1988,6 +1989,29 @@ return Op; } + case AMDGPUIntrinsic::SI_export: { + const ConstantSDNode *En = cast(Op.getOperand(2)); + const ConstantSDNode *VM = cast(Op.getOperand(3)); + const ConstantSDNode *Done = cast(Op.getOperand(4)); + const ConstantSDNode *Tgt = cast(Op.getOperand(5)); + const ConstantSDNode *Compr = cast(Op.getOperand(6)); + + const SDValue Ops[] = { + Chain, + DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i32), + DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i32), // TODO: i1 + DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i32), + DAG.getTargetConstant(Compr->getZExtValue(), DL, MVT::i32), // TODO: i1 + Op.getOperand(7), // src0 + Op.getOperand(8), // src1 + Op.getOperand(9), // src2 + Op.getOperand(10) // src3 + }; + + unsigned Opc = Done->isNullValue() ? + AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE; + return DAG.getNode(Opc, DL, Op->getVTList(), Ops); + } default: return SDValue(); } Index: lib/Target/AMDGPU/SIInsertWaits.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertWaits.cpp +++ lib/Target/AMDGPU/SIInsertWaits.cpp @@ -188,8 +188,7 @@ Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT); // Only consider stores or EXP for EXP_CNT - Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT && - (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore())); + Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT) && MI.mayStore(); // LGKM may uses larger values if (TSFlags & SIInstrFlags::LGKM_CNT) { Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -660,6 +660,16 @@ let hasSideEffects = 0; } +class EXPCommon pattern> : + InstSI { + let EXP_CNT = 1; + let mayLoad = 0; // Set to 1 if done bit is set. + let mayStore = 1; + let UseNamedOperandTable = 1; + let Uses = [EXEC]; + let SchedRW = [WriteExport]; +} + } // End Uses = [EXEC] //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -678,39 +678,6 @@ } //===----------------------------------------------------------------------===// -// EXP classes -//===----------------------------------------------------------------------===// - -class EXPCommon : InstSI< - (outs), - (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm, - VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3), - "exp $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3", - [] > { - - let EXP_CNT = 1; - let Uses = [EXEC]; - let SchedRW = [WriteExport]; -} - -multiclass EXP_m { - - let isPseudo = 1, isCodeGenOnly = 1 in { - def "" : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.NONE> ; - } - - def _si : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.SI>, EXPe { - let DecoderNamespace="SICI"; - let DisableDecoder = DisableSIDecoder; - } - - def _vi : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.VI>, EXPe_vi { - let DecoderNamespace="VI"; - let DisableDecoder = DisableVIDecoder; - } -} - -//===----------------------------------------------------------------------===// // Scalar classes //===----------------------------------------------------------------------===// @@ -2551,6 +2518,46 @@ } //===----------------------------------------------------------------------===// +// Export opcodes +//===----------------------------------------------------------------------===// + +class EXP_Helper : EXPCommon< + (outs), + (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$vm, + VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3), + "exp $en, $tgt, $compr, "#!if(done, "1", "0")#", $vm, $src0, $src1, $src2, $src3", + [(node (i32 timm:$en), (i32 timm:$vm), (i32 timm:$tgt), (i32 timm:$compr), + f32:$src0, f32:$src1, f32:$src2, f32:$src3)] +>; + +// Split EXP instruction into EXP and EXP_DONE so we can set +// hasSideEffects for done=1. +multiclass EXP_m { + let mayLoad = done in { + let isPseudo = 1, isCodeGenOnly = 1 in { + def "" : EXP_Helper, + SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>; + } + + let done = done in { + def _si : EXP_Helper, + SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>, + EXPe { + let DecoderNamespace = "SICI"; + let DisableDecoder = DisableSIDecoder; + } + + def _vi : EXP_Helper, + SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>, + EXPe_vi { + let DecoderNamespace = "VI"; + let DisableDecoder = DisableVIDecoder; + } + } + } +} + +//===----------------------------------------------------------------------===// // Vector I/O classes //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -35,7 +35,8 @@ // EXP Instructions //===----------------------------------------------------------------------===// -defm EXP : EXP_m; +defm EXP : EXP_m<0, AMDGPUexport>; +defm EXP_DONE : EXP_m<1, AMDGPUexport_done>; //===----------------------------------------------------------------------===// // SMRD Instructions @@ -2139,13 +2140,6 @@ (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0) >; -def : Pat < - (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr, - f32:$src0, f32:$src1, f32:$src2, f32:$src3), - (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm, - $src0, $src1, $src2, $src3) ->; - //===----------------------------------------------------------------------===// // buffer_load/store_format patterns //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIIntrinsics.td =================================================================== --- lib/Target/AMDGPU/SIIntrinsics.td +++ lib/Target/AMDGPU/SIIntrinsics.td @@ -14,7 +14,20 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + + def int_SI_export : Intrinsic <[], + [llvm_i32_ty, // en + llvm_i32_ty, // vm (FIXME: should be i1) + llvm_i32_ty, // done (FIXME: should be i1) + llvm_i32_ty, // tgt + llvm_i32_ty, // compr (FIXME: should be i1) + llvm_float_ty, // src0 + llvm_float_ty, // src1 + llvm_float_ty, // src2 + llvm_float_ty], // src3 + [] + >; + def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ; Index: lib/Target/AMDGPU/SILowerControlFlow.cpp =================================================================== --- lib/Target/AMDGPU/SILowerControlFlow.cpp +++ lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -228,12 +228,11 @@ MachineBasicBlock::iterator Insert = SkipBB->begin(); // Exec mask is zero: Export to NULL target... - BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP)) + BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP_DONE)) .addImm(0) .addImm(0x09) // V_008DFC_SQ_EXP_NULL .addImm(0) .addImm(1) - .addImm(1) .addReg(AMDGPU::VGPR0, RegState::Undef) .addReg(AMDGPU::VGPR0, RegState::Undef) .addReg(AMDGPU::VGPR0, RegState::Undef) Index: lib/Target/AMDGPU/SIWholeQuadMode.cpp =================================================================== --- lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -165,8 +165,8 @@ Flags = StateExact; } else { // Handle export instructions with the exec mask valid flag set - if (Opcode == AMDGPU::EXP) { - if (MI.getOperand(4).getImm() != 0) + if (Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE) { + if (TII->hasModifiersSet(MI, AMDGPU::OpName::vm)) ExecExports.push_back(&MI); } else if (Opcode == AMDGPU::SI_PS_LIVE) { LiveMaskQueries.push_back(&MI);