Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -265,7 +265,9 @@ MAD_U24, MAD_I24, TEXTURE_FETCH, - EXPORT, + EXPORT, // exp on SI+ + EXPORT_DONE, // exp on SI+ with done bit set + R600_EXPORT, CONST_ADDRESS, REGISTER_LOAD, REGISTER_STORE, Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2702,6 +2702,8 @@ NODE_NAME_CASE(MAD_I24) NODE_NAME_CASE(TEXTURE_FETCH) NODE_NAME_CASE(EXPORT) + NODE_NAME_CASE(EXPORT_DONE) + NODE_NAME_CASE(R600_EXPORT) NODE_NAME_CASE(CONST_ADDRESS) NODE_NAME_CASE(REGISTER_LOAD) NODE_NAME_CASE(REGISTER_STORE) Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -225,6 +225,31 @@ SDTypeProfile<1, 4, [SDTCisFP<0>]>, [SDNPInGlue]>; +// SI+ export +def AMDGPUExportOp : SDTypeProfile<0, 8, [ + SDTCisInt<0>, // i32 en + SDTCisInt<1>, // i1 vm + // skip done + SDTCisSameAs<2, 0>, // i32 tgt + SDTCisSameAs<3, 1>, // i1 compr + SDTCisFP<4>, // f32 src0 + SDTCisSameAs<5, 4>, // f32 src1 + SDTCisSameAs<6, 4>, // f32 src2 + SDTCisSameAs<7, 4> // f32 src3 +]>; + +def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp, + [SDNPHasChain, SDNPMayStore]>; + +def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp, + [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>; + + +def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; + +def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp, + [SDNPHasChain, SDNPSideEffect]>; + //===----------------------------------------------------------------------===// // Flow Control Profile Types //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -623,7 +623,7 @@ DAG.getConstant(2, DL, MVT::i32), // SWZ_Z DAG.getConstant(3, DL, MVT::i32) // SWZ_W }; - return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args); + return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args); } // default for switch(IntrinsicID) @@ -1987,7 +1987,7 @@ return SDValue(); } - case AMDGPUISD::EXPORT: { + case AMDGPUISD::R600_EXPORT: { SDValue Arg = N->getOperand(1); if (Arg.getOpcode() != ISD::BUILD_VECTOR) break; @@ -2004,7 +2004,7 @@ }; SDLoc DL(N); NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL); - return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs); + return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs); } case AMDGPUISD::TEXTURE_FETCH: { SDValue Arg = N->getOperand(1); Index: lib/Target/AMDGPU/R600Instructions.td =================================================================== --- lib/Target/AMDGPU/R600Instructions.td +++ lib/Target/AMDGPU/R600Instructions.td @@ -424,11 +424,6 @@ // Export Instructions //===----------------------------------------------------------------------===// -def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; - -def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, - [SDNPHasChain, SDNPSideEffect]>; - class ExportWord0 { field bits<32> Word0; @@ -496,7 +491,7 @@ (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0) >; - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), + def : Pat<(R600_EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), (ExportInst R600_Reg128:$src, imm:$type, imm:$base, imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1173,6 +1173,29 @@ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, Op->getVTList(), Ops, VT, MMO); } + case AMDGPUIntrinsic::SI_export: { + const ConstantSDNode *En = cast(Op.getOperand(2)); + const ConstantSDNode *VM = cast(Op.getOperand(3)); + const ConstantSDNode *Done = cast(Op.getOperand(4)); + const ConstantSDNode *Tgt = cast(Op.getOperand(5)); + const ConstantSDNode *Compr = cast(Op.getOperand(6)); + + const SDValue Ops[] = { + Chain, + DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i32), + DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1), + DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i32), + DAG.getTargetConstant(Compr->getZExtValue(), DL, MVT::i1), + Op.getOperand(7), // src0 + Op.getOperand(8), // src1 + Op.getOperand(9), // src2 + Op.getOperand(10) // src3 + }; + + unsigned Opc = Done->isNullValue() ? + AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE; + return DAG.getNode(Opc, DL, Op->getVTList(), Ops); + } default: return SDValue(); } Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -605,6 +605,14 @@ let hasSideEffects = 0; } +class EXPCommon pattern> : + InstSI { + let EXP_CNT = 1; + let mayLoad = 0; + let mayStore = 1; + let hasSideEffects = ?; // Set if done bit in use. +} + } // End Uses = [EXEC] //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -614,27 +614,6 @@ // EXP classes //===----------------------------------------------------------------------===// -class EXPCommon : InstSI< - (outs), - (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm, - VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3), - "exp $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3", - [] > { - - let EXP_CNT = 1; - let Uses = [EXEC]; -} - -multiclass EXP_m { - - let isPseudo = 1, isCodeGenOnly = 1 in { - def "" : EXPCommon, SIMCInstr <"exp", SISubtarget.NONE> ; - } - - def _si : EXPCommon, SIMCInstr <"exp", SISubtarget.SI>, EXPe; - - def _vi : EXPCommon, SIMCInstr <"exp", SISubtarget.VI>, EXPe_vi; -} //===----------------------------------------------------------------------===// // Scalar classes @@ -1889,6 +1868,41 @@ } //===----------------------------------------------------------------------===// +// Export opcodes +//===----------------------------------------------------------------------===// + +class EXP_Helper : EXPCommon< + (outs), + (ins i32imm:$en, i32imm:$tgt, i1imm:$compr, i1imm:$vm, + VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3), + "exp $en, $tgt, $compr, "#!if(done, "1", "0")#", $vm, $src0, $src1, $src2, $src3", + [(node (i32 timm:$en), (i1 timm:$vm), (i32 timm:$tgt), (i1 timm:$compr), + f32:$src0, f32:$src1, f32:$src2, f32:$src3)] +>; + +// Split EXP instruction into EXP and EXP_DONE so we can set +// hasSideEffects for done=1. +multiclass EXP_m { + let hasSideEffects = done in { + let isPseudo = 1, isCodeGenOnly = 1 in { + def "" : EXP_Helper, + SIMCInstr <"exp"#!if(done, "_done", ""), SISubtarget.NONE> ; + } + + let done = done in { + def _si : EXP_Helper, + SIMCInstr <"exp"#!if(done, "_done", ""), SISubtarget.SI>, + EXPe; + + def _vi : EXP_Helper, + SIMCInstr <"exp"#!if(done, "_done", ""), SISubtarget.VI>, + EXPe_vi; + } + } +} + + +//===----------------------------------------------------------------------===// // Vector I/O classes //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -51,7 +51,8 @@ // EXP Instructions //===----------------------------------------------------------------------===// -defm EXP : EXP_m; +defm EXP : EXP_m<0, AMDGPUexport>; +defm EXP_DONE : EXP_m<1, AMDGPUexport_done>; //===----------------------------------------------------------------------===// // SMRD Instructions @@ -2062,14 +2063,6 @@ (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0) >; -/* int_SI_export */ -def : Pat < - (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr, - f32:$src0, f32:$src1, f32:$src2, f32:$src3), - (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm, - $src0, $src1, $src2, $src3) ->; - //===----------------------------------------------------------------------===// // SMRD Patterns //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIIntrinsics.td =================================================================== --- lib/Target/AMDGPU/SIIntrinsics.td +++ lib/Target/AMDGPU/SIIntrinsics.td @@ -16,7 +16,20 @@ def int_SI_tid : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>; def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + + def int_SI_export : Intrinsic <[], + [llvm_i32_ty, // en + llvm_i32_ty, // vm (FIXME: should be i1) + llvm_i32_ty, // done (FIXME: should be i1) + llvm_i32_ty, // tgt + llvm_i32_ty, // compr (FIXME: should be i1) + llvm_float_ty, // src0 + llvm_float_ty, // src1 + llvm_float_ty, // src2 + llvm_float_ty], // src3 + [] + >; + def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ; Index: test/CodeGen/AMDGPU/llvm.SI.load.dword.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.SI.load.dword.ll +++ test/CodeGen/AMDGPU/llvm.SI.load.dword.ll @@ -7,13 +7,13 @@ ; FIXME: Out of bounds immediate offset crashes ; CHECK-LABEL: {{^}}main: -; CHECK: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 glc slc -; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen glc slc -; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen glc slc -; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen glc slc -; CHECK: s_movk_i32 [[K:s[0-9]+]], 0x4d2 ; encoding -; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, [[K]] idxen offen offset:65535 glc slc - +; CHECK-DAG: s_movk_i32 [[K:s[0-9]+]], 0x4d2 ; encoding +; CHECK-DAG: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 glc slc +; CHECK-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen glc slc +; CHECK-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen glc slc +; CHECK-DAG: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen glc slc +; CHECK-DAG: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, [[K]] idxen offen offset:65535 glc slc +; CHECK: s_endpgm define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <16 x i8>] addrspace(2)* byval %arg3, [17 x <16 x i8>] addrspace(2)* inreg %arg4, [17 x <16 x i8>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) #0 { main_body: %tmp = getelementptr [2 x <16 x i8>], [2 x <16 x i8>] addrspace(2)* %arg3, i64 0, i32 1