diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -2348,508 +2348,508 @@ switch (N->getOpcode()) { default: return false; case NVPTXISD::Tex1DFloatS32: - Opc = NVPTX::TEX_1D_F32_S32; + Opc = NVPTX::TEX_1D_F32_S32_RR; break; case NVPTXISD::Tex1DFloatFloat: - Opc = NVPTX::TEX_1D_F32_F32; + Opc = NVPTX::TEX_1D_F32_F32_RR; break; case NVPTXISD::Tex1DFloatFloatLevel: - Opc = NVPTX::TEX_1D_F32_F32_LEVEL; + Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR; break; case NVPTXISD::Tex1DFloatFloatGrad: - Opc = NVPTX::TEX_1D_F32_F32_GRAD; + Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR; break; case NVPTXISD::Tex1DS32S32: - Opc = NVPTX::TEX_1D_S32_S32; + Opc = NVPTX::TEX_1D_S32_S32_RR; break; case NVPTXISD::Tex1DS32Float: - Opc = NVPTX::TEX_1D_S32_F32; + Opc = NVPTX::TEX_1D_S32_F32_RR; break; case NVPTXISD::Tex1DS32FloatLevel: - Opc = NVPTX::TEX_1D_S32_F32_LEVEL; + Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR; break; case NVPTXISD::Tex1DS32FloatGrad: - Opc = NVPTX::TEX_1D_S32_F32_GRAD; + Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR; break; case NVPTXISD::Tex1DU32S32: - Opc = NVPTX::TEX_1D_U32_S32; + Opc = NVPTX::TEX_1D_U32_S32_RR; break; case NVPTXISD::Tex1DU32Float: - Opc = NVPTX::TEX_1D_U32_F32; + Opc = NVPTX::TEX_1D_U32_F32_RR; break; case NVPTXISD::Tex1DU32FloatLevel: - Opc = NVPTX::TEX_1D_U32_F32_LEVEL; + Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR; break; case NVPTXISD::Tex1DU32FloatGrad: - Opc = NVPTX::TEX_1D_U32_F32_GRAD; + Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR; break; case NVPTXISD::Tex1DArrayFloatS32: - Opc = NVPTX::TEX_1D_ARRAY_F32_S32; + Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR; break; case NVPTXISD::Tex1DArrayFloatFloat: - Opc = NVPTX::TEX_1D_ARRAY_F32_F32; + Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR; break; case NVPTXISD::Tex1DArrayFloatFloatLevel: - Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL; + Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR; break; case NVPTXISD::Tex1DArrayFloatFloatGrad: - Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD; + Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR; break; case NVPTXISD::Tex1DArrayS32S32: - Opc = NVPTX::TEX_1D_ARRAY_S32_S32; + Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR; break; case NVPTXISD::Tex1DArrayS32Float: - Opc = NVPTX::TEX_1D_ARRAY_S32_F32; + Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR; break; case NVPTXISD::Tex1DArrayS32FloatLevel: - Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL; + Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR; break; case NVPTXISD::Tex1DArrayS32FloatGrad: - Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD; + Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR; break; case NVPTXISD::Tex1DArrayU32S32: - Opc = NVPTX::TEX_1D_ARRAY_U32_S32; + Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR; break; case NVPTXISD::Tex1DArrayU32Float: - Opc = NVPTX::TEX_1D_ARRAY_U32_F32; + Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR; break; case NVPTXISD::Tex1DArrayU32FloatLevel: - Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL; + Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR; break; case NVPTXISD::Tex1DArrayU32FloatGrad: - Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD; + Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR; break; case NVPTXISD::Tex2DFloatS32: - Opc = NVPTX::TEX_2D_F32_S32; + Opc = NVPTX::TEX_2D_F32_S32_RR; break; case NVPTXISD::Tex2DFloatFloat: - Opc = NVPTX::TEX_2D_F32_F32; + Opc = NVPTX::TEX_2D_F32_F32_RR; break; case NVPTXISD::Tex2DFloatFloatLevel: - Opc = NVPTX::TEX_2D_F32_F32_LEVEL; + Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR; break; case NVPTXISD::Tex2DFloatFloatGrad: - Opc = NVPTX::TEX_2D_F32_F32_GRAD; + Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR; break; case NVPTXISD::Tex2DS32S32: - Opc = NVPTX::TEX_2D_S32_S32; + Opc = NVPTX::TEX_2D_S32_S32_RR; break; case NVPTXISD::Tex2DS32Float: - Opc = NVPTX::TEX_2D_S32_F32; + Opc = NVPTX::TEX_2D_S32_F32_RR; break; case NVPTXISD::Tex2DS32FloatLevel: - Opc = NVPTX::TEX_2D_S32_F32_LEVEL; + Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR; break; case NVPTXISD::Tex2DS32FloatGrad: - Opc = NVPTX::TEX_2D_S32_F32_GRAD; + Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR; break; case NVPTXISD::Tex2DU32S32: - Opc = NVPTX::TEX_2D_U32_S32; + Opc = NVPTX::TEX_2D_U32_S32_RR; break; case NVPTXISD::Tex2DU32Float: - Opc = NVPTX::TEX_2D_U32_F32; + Opc = NVPTX::TEX_2D_U32_F32_RR; break; case NVPTXISD::Tex2DU32FloatLevel: - Opc = NVPTX::TEX_2D_U32_F32_LEVEL; + Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR; break; case NVPTXISD::Tex2DU32FloatGrad: - Opc = NVPTX::TEX_2D_U32_F32_GRAD; + Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR; break; case NVPTXISD::Tex2DArrayFloatS32: - Opc = NVPTX::TEX_2D_ARRAY_F32_S32; + Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR; break; case NVPTXISD::Tex2DArrayFloatFloat: - Opc = NVPTX::TEX_2D_ARRAY_F32_F32; + Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR; break; case NVPTXISD::Tex2DArrayFloatFloatLevel: - Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL; + Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR; break; case NVPTXISD::Tex2DArrayFloatFloatGrad: - Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD; + Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR; break; case NVPTXISD::Tex2DArrayS32S32: - Opc = NVPTX::TEX_2D_ARRAY_S32_S32; + Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR; break; case NVPTXISD::Tex2DArrayS32Float: - Opc = NVPTX::TEX_2D_ARRAY_S32_F32; + Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR; break; case NVPTXISD::Tex2DArrayS32FloatLevel: - Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL; + Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR; break; case NVPTXISD::Tex2DArrayS32FloatGrad: - Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD; + Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR; break; case NVPTXISD::Tex2DArrayU32S32: - Opc = NVPTX::TEX_2D_ARRAY_U32_S32; + Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR; break; case NVPTXISD::Tex2DArrayU32Float: - Opc = NVPTX::TEX_2D_ARRAY_U32_F32; + Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR; break; case NVPTXISD::Tex2DArrayU32FloatLevel: - Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL; + Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR; break; case NVPTXISD::Tex2DArrayU32FloatGrad: - Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD; + Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR; break; case NVPTXISD::Tex3DFloatS32: - Opc = NVPTX::TEX_3D_F32_S32; + Opc = NVPTX::TEX_3D_F32_S32_RR; break; case NVPTXISD::Tex3DFloatFloat: - Opc = NVPTX::TEX_3D_F32_F32; + Opc = NVPTX::TEX_3D_F32_F32_RR; break; case NVPTXISD::Tex3DFloatFloatLevel: - Opc = NVPTX::TEX_3D_F32_F32_LEVEL; + Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR; break; case NVPTXISD::Tex3DFloatFloatGrad: - Opc = NVPTX::TEX_3D_F32_F32_GRAD; + Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR; break; case NVPTXISD::Tex3DS32S32: - Opc = NVPTX::TEX_3D_S32_S32; + Opc = NVPTX::TEX_3D_S32_S32_RR; break; case NVPTXISD::Tex3DS32Float: - Opc = NVPTX::TEX_3D_S32_F32; + Opc = NVPTX::TEX_3D_S32_F32_RR; break; case NVPTXISD::Tex3DS32FloatLevel: - Opc = NVPTX::TEX_3D_S32_F32_LEVEL; + Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR; break; case NVPTXISD::Tex3DS32FloatGrad: - Opc = NVPTX::TEX_3D_S32_F32_GRAD; + Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR; break; case NVPTXISD::Tex3DU32S32: - Opc = NVPTX::TEX_3D_U32_S32; + Opc = NVPTX::TEX_3D_U32_S32_RR; break; case NVPTXISD::Tex3DU32Float: - Opc = NVPTX::TEX_3D_U32_F32; + Opc = NVPTX::TEX_3D_U32_F32_RR; break; case NVPTXISD::Tex3DU32FloatLevel: - Opc = NVPTX::TEX_3D_U32_F32_LEVEL; + Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR; break; case NVPTXISD::Tex3DU32FloatGrad: - Opc = NVPTX::TEX_3D_U32_F32_GRAD; + Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR; break; case NVPTXISD::TexCubeFloatFloat: - Opc = NVPTX::TEX_CUBE_F32_F32; + Opc = NVPTX::TEX_CUBE_F32_F32_RR; break; case NVPTXISD::TexCubeFloatFloatLevel: - Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL; + Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR; break; case NVPTXISD::TexCubeS32Float: - Opc = NVPTX::TEX_CUBE_S32_F32; + Opc = NVPTX::TEX_CUBE_S32_F32_RR; break; case NVPTXISD::TexCubeS32FloatLevel: - Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL; + Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR; break; case NVPTXISD::TexCubeU32Float: - Opc = NVPTX::TEX_CUBE_U32_F32; + Opc = NVPTX::TEX_CUBE_U32_F32_RR; break; case NVPTXISD::TexCubeU32FloatLevel: - Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL; + Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR; break; case NVPTXISD::TexCubeArrayFloatFloat: - Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32; + Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR; break; case NVPTXISD::TexCubeArrayFloatFloatLevel: - Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL; + Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR; break; case NVPTXISD::TexCubeArrayS32Float: - Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32; + Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR; break; case NVPTXISD::TexCubeArrayS32FloatLevel: - Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL; + Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR; break; case NVPTXISD::TexCubeArrayU32Float: - Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32; + Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR; break; case NVPTXISD::TexCubeArrayU32FloatLevel: - Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL; + Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR; break; case NVPTXISD::Tld4R2DFloatFloat: - Opc = NVPTX::TLD4_R_2D_F32_F32; + Opc = NVPTX::TLD4_R_2D_F32_F32_RR; break; case NVPTXISD::Tld4G2DFloatFloat: - Opc = NVPTX::TLD4_G_2D_F32_F32; + Opc = NVPTX::TLD4_G_2D_F32_F32_RR; break; case NVPTXISD::Tld4B2DFloatFloat: - Opc = NVPTX::TLD4_B_2D_F32_F32; + Opc = NVPTX::TLD4_B_2D_F32_F32_RR; break; case NVPTXISD::Tld4A2DFloatFloat: - Opc = NVPTX::TLD4_A_2D_F32_F32; + Opc = NVPTX::TLD4_A_2D_F32_F32_RR; break; case NVPTXISD::Tld4R2DS64Float: - Opc = NVPTX::TLD4_R_2D_S32_F32; + Opc = NVPTX::TLD4_R_2D_S32_F32_RR; break; case NVPTXISD::Tld4G2DS64Float: - Opc = NVPTX::TLD4_G_2D_S32_F32; + Opc = NVPTX::TLD4_G_2D_S32_F32_RR; break; case NVPTXISD::Tld4B2DS64Float: - Opc = NVPTX::TLD4_B_2D_S32_F32; + Opc = NVPTX::TLD4_B_2D_S32_F32_RR; break; case NVPTXISD::Tld4A2DS64Float: - Opc = NVPTX::TLD4_A_2D_S32_F32; + Opc = NVPTX::TLD4_A_2D_S32_F32_RR; break; case NVPTXISD::Tld4R2DU64Float: - Opc = NVPTX::TLD4_R_2D_U32_F32; + Opc = NVPTX::TLD4_R_2D_U32_F32_RR; break; case NVPTXISD::Tld4G2DU64Float: - Opc = NVPTX::TLD4_G_2D_U32_F32; + Opc = NVPTX::TLD4_G_2D_U32_F32_RR; break; case NVPTXISD::Tld4B2DU64Float: - Opc = NVPTX::TLD4_B_2D_U32_F32; + Opc = NVPTX::TLD4_B_2D_U32_F32_RR; break; case NVPTXISD::Tld4A2DU64Float: - Opc = NVPTX::TLD4_A_2D_U32_F32; + Opc = NVPTX::TLD4_A_2D_U32_F32_RR; break; case NVPTXISD::TexUnified1DFloatS32: - Opc = NVPTX::TEX_UNIFIED_1D_F32_S32; + Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R; break; case NVPTXISD::TexUnified1DFloatFloat: - Opc = NVPTX::TEX_UNIFIED_1D_F32_F32; + Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R; break; case NVPTXISD::TexUnified1DFloatFloatLevel: - Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R; break; case NVPTXISD::TexUnified1DFloatFloatGrad: - Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD; + Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R; break; case NVPTXISD::TexUnified1DS32S32: - Opc = NVPTX::TEX_UNIFIED_1D_S32_S32; + Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R; break; case NVPTXISD::TexUnified1DS32Float: - Opc = NVPTX::TEX_UNIFIED_1D_S32_F32; + Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R; break; case NVPTXISD::TexUnified1DS32FloatLevel: - Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R; break; case NVPTXISD::TexUnified1DS32FloatGrad: - Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD; + Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R; break; case NVPTXISD::TexUnified1DU32S32: - Opc = NVPTX::TEX_UNIFIED_1D_U32_S32; + Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R; break; case NVPTXISD::TexUnified1DU32Float: - Opc = NVPTX::TEX_UNIFIED_1D_U32_F32; + Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R; break; case NVPTXISD::TexUnified1DU32FloatLevel: - Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R; break; case NVPTXISD::TexUnified1DU32FloatGrad: - Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD; + Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R; break; case NVPTXISD::TexUnified1DArrayFloatS32: - Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32; + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R; break; case NVPTXISD::TexUnified1DArrayFloatFloat: - Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32; + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R; break; case NVPTXISD::TexUnified1DArrayFloatFloatLevel: - Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R; break; case NVPTXISD::TexUnified1DArrayFloatFloatGrad: - Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD; + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R; break; case NVPTXISD::TexUnified1DArrayS32S32: - Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32; + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R; break; case NVPTXISD::TexUnified1DArrayS32Float: - Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32; + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R; break; case NVPTXISD::TexUnified1DArrayS32FloatLevel: - Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R; break; case NVPTXISD::TexUnified1DArrayS32FloatGrad: - Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD; + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R; break; case NVPTXISD::TexUnified1DArrayU32S32: - Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32; + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R; break; case NVPTXISD::TexUnified1DArrayU32Float: - Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32; + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R; break; case NVPTXISD::TexUnified1DArrayU32FloatLevel: - Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R; break; case NVPTXISD::TexUnified1DArrayU32FloatGrad: - Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD; + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R; break; case NVPTXISD::TexUnified2DFloatS32: - Opc = NVPTX::TEX_UNIFIED_2D_F32_S32; + Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R; break; case NVPTXISD::TexUnified2DFloatFloat: - Opc = NVPTX::TEX_UNIFIED_2D_F32_F32; + Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R; break; case NVPTXISD::TexUnified2DFloatFloatLevel: - Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R; break; case NVPTXISD::TexUnified2DFloatFloatGrad: - Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD; + Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R; break; case NVPTXISD::TexUnified2DS32S32: - Opc = NVPTX::TEX_UNIFIED_2D_S32_S32; + Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R; break; case NVPTXISD::TexUnified2DS32Float: - Opc = NVPTX::TEX_UNIFIED_2D_S32_F32; + Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R; break; case NVPTXISD::TexUnified2DS32FloatLevel: - Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R; break; case NVPTXISD::TexUnified2DS32FloatGrad: - Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD; + Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R; break; case NVPTXISD::TexUnified2DU32S32: - Opc = NVPTX::TEX_UNIFIED_2D_U32_S32; + Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R; break; case NVPTXISD::TexUnified2DU32Float: - Opc = NVPTX::TEX_UNIFIED_2D_U32_F32; + Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R; break; case NVPTXISD::TexUnified2DU32FloatLevel: - Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R; break; case NVPTXISD::TexUnified2DU32FloatGrad: - Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD; + Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R; break; case NVPTXISD::TexUnified2DArrayFloatS32: - Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32; + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R; break; case NVPTXISD::TexUnified2DArrayFloatFloat: - Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32; + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R; break; case NVPTXISD::TexUnified2DArrayFloatFloatLevel: - Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R; break; case NVPTXISD::TexUnified2DArrayFloatFloatGrad: - Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD; + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R; break; case NVPTXISD::TexUnified2DArrayS32S32: - Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32; + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R; break; case NVPTXISD::TexUnified2DArrayS32Float: - Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32; + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R; break; case NVPTXISD::TexUnified2DArrayS32FloatLevel: - Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R; break; case NVPTXISD::TexUnified2DArrayS32FloatGrad: - Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD; + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R; break; case NVPTXISD::TexUnified2DArrayU32S32: - Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32; + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R; break; case NVPTXISD::TexUnified2DArrayU32Float: - Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32; + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R; break; case NVPTXISD::TexUnified2DArrayU32FloatLevel: - Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R; break; case NVPTXISD::TexUnified2DArrayU32FloatGrad: - Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD; + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R; break; case NVPTXISD::TexUnified3DFloatS32: - Opc = NVPTX::TEX_UNIFIED_3D_F32_S32; + Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R; break; case NVPTXISD::TexUnified3DFloatFloat: - Opc = NVPTX::TEX_UNIFIED_3D_F32_F32; + Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R; break; case NVPTXISD::TexUnified3DFloatFloatLevel: - Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R; break; case NVPTXISD::TexUnified3DFloatFloatGrad: - Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD; + Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R; break; case NVPTXISD::TexUnified3DS32S32: - Opc = NVPTX::TEX_UNIFIED_3D_S32_S32; + Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R; break; case NVPTXISD::TexUnified3DS32Float: - Opc = NVPTX::TEX_UNIFIED_3D_S32_F32; + Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R; break; case NVPTXISD::TexUnified3DS32FloatLevel: - Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R; break; case NVPTXISD::TexUnified3DS32FloatGrad: - Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD; + Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R; break; case NVPTXISD::TexUnified3DU32S32: - Opc = NVPTX::TEX_UNIFIED_3D_U32_S32; + Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R; break; case NVPTXISD::TexUnified3DU32Float: - Opc = NVPTX::TEX_UNIFIED_3D_U32_F32; + Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R; break; case NVPTXISD::TexUnified3DU32FloatLevel: - Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R; break; case NVPTXISD::TexUnified3DU32FloatGrad: - Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD; + Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R; break; case NVPTXISD::TexUnifiedCubeFloatFloat: - Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32; + Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R; break; case NVPTXISD::TexUnifiedCubeFloatFloatLevel: - Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R; break; case NVPTXISD::TexUnifiedCubeS32Float: - Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32; + Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R; break; case NVPTXISD::TexUnifiedCubeS32FloatLevel: - Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R; break; case NVPTXISD::TexUnifiedCubeU32Float: - Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32; + Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R; break; case NVPTXISD::TexUnifiedCubeU32FloatLevel: - Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R; break; case NVPTXISD::TexUnifiedCubeArrayFloatFloat: - Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32; + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R; break; case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: - Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R; break; case NVPTXISD::TexUnifiedCubeArrayS32Float: - Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32; + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R; break; case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: - Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R; break; case NVPTXISD::TexUnifiedCubeArrayU32Float: - Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32; + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R; break; case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: - Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL; + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R; break; case NVPTXISD::Tld4UnifiedR2DFloatFloat: - Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32; + Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R; break; case NVPTXISD::Tld4UnifiedG2DFloatFloat: - Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32; + Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R; break; case NVPTXISD::Tld4UnifiedB2DFloatFloat: - Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32; + Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R; break; case NVPTXISD::Tld4UnifiedA2DFloatFloat: - Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32; + Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R; break; case NVPTXISD::Tld4UnifiedR2DS64Float: - Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32; + Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R; break; case NVPTXISD::Tld4UnifiedG2DS64Float: - Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32; + Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R; break; case NVPTXISD::Tld4UnifiedB2DS64Float: - Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32; + Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R; break; case NVPTXISD::Tld4UnifiedA2DS64Float: - Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32; + Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R; break; case NVPTXISD::Tld4UnifiedR2DU64Float: - Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32; + Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R; break; case NVPTXISD::Tld4UnifiedG2DU64Float: - Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32; + Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R; break; case NVPTXISD::Tld4UnifiedB2DU64Float: - Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32; + Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R; break; case NVPTXISD::Tld4UnifiedA2DU64Float: - Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32; + Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R; break; } @@ -2866,499 +2866,499 @@ switch (N->getOpcode()) { default: return false; case NVPTXISD::Suld1DI8Clamp: - Opc = NVPTX::SULD_1D_I8_CLAMP; + Opc = NVPTX::SULD_1D_I8_CLAMP_R; break; case NVPTXISD::Suld1DI16Clamp: - Opc = NVPTX::SULD_1D_I16_CLAMP; + Opc = NVPTX::SULD_1D_I16_CLAMP_R; break; case NVPTXISD::Suld1DI32Clamp: - Opc = NVPTX::SULD_1D_I32_CLAMP; + Opc = NVPTX::SULD_1D_I32_CLAMP_R; break; case NVPTXISD::Suld1DI64Clamp: - Opc = NVPTX::SULD_1D_I64_CLAMP; + Opc = NVPTX::SULD_1D_I64_CLAMP_R; break; case NVPTXISD::Suld1DV2I8Clamp: - Opc = NVPTX::SULD_1D_V2I8_CLAMP; + Opc = NVPTX::SULD_1D_V2I8_CLAMP_R; break; case NVPTXISD::Suld1DV2I16Clamp: - Opc = NVPTX::SULD_1D_V2I16_CLAMP; + Opc = NVPTX::SULD_1D_V2I16_CLAMP_R; break; case NVPTXISD::Suld1DV2I32Clamp: - Opc = NVPTX::SULD_1D_V2I32_CLAMP; + Opc = NVPTX::SULD_1D_V2I32_CLAMP_R; break; case NVPTXISD::Suld1DV2I64Clamp: - Opc = NVPTX::SULD_1D_V2I64_CLAMP; + Opc = NVPTX::SULD_1D_V2I64_CLAMP_R; break; case NVPTXISD::Suld1DV4I8Clamp: - Opc = NVPTX::SULD_1D_V4I8_CLAMP; + Opc = NVPTX::SULD_1D_V4I8_CLAMP_R; break; case NVPTXISD::Suld1DV4I16Clamp: - Opc = NVPTX::SULD_1D_V4I16_CLAMP; + Opc = NVPTX::SULD_1D_V4I16_CLAMP_R; break; case NVPTXISD::Suld1DV4I32Clamp: - Opc = NVPTX::SULD_1D_V4I32_CLAMP; + Opc = NVPTX::SULD_1D_V4I32_CLAMP_R; break; case NVPTXISD::Suld1DArrayI8Clamp: - Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP; + Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R; break; case NVPTXISD::Suld1DArrayI16Clamp: - Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP; + Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R; break; case NVPTXISD::Suld1DArrayI32Clamp: - Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP; + Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R; break; case NVPTXISD::Suld1DArrayI64Clamp: - Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP; + Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R; break; case NVPTXISD::Suld1DArrayV2I8Clamp: - Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP; + Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R; break; case NVPTXISD::Suld1DArrayV2I16Clamp: - Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP; + Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R; break; case NVPTXISD::Suld1DArrayV2I32Clamp: - Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP; + Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R; break; case NVPTXISD::Suld1DArrayV2I64Clamp: - Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP; + Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R; break; case NVPTXISD::Suld1DArrayV4I8Clamp: - Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP; + Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R; break; case NVPTXISD::Suld1DArrayV4I16Clamp: - Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP; + Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R; break; case NVPTXISD::Suld1DArrayV4I32Clamp: - Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP; + Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R; break; case NVPTXISD::Suld2DI8Clamp: - Opc = NVPTX::SULD_2D_I8_CLAMP; + Opc = NVPTX::SULD_2D_I8_CLAMP_R; break; case NVPTXISD::Suld2DI16Clamp: - Opc = NVPTX::SULD_2D_I16_CLAMP; + Opc = NVPTX::SULD_2D_I16_CLAMP_R; break; case NVPTXISD::Suld2DI32Clamp: - Opc = NVPTX::SULD_2D_I32_CLAMP; + Opc = NVPTX::SULD_2D_I32_CLAMP_R; break; case NVPTXISD::Suld2DI64Clamp: - Opc = NVPTX::SULD_2D_I64_CLAMP; + Opc = NVPTX::SULD_2D_I64_CLAMP_R; break; case NVPTXISD::Suld2DV2I8Clamp: - Opc = NVPTX::SULD_2D_V2I8_CLAMP; + Opc = NVPTX::SULD_2D_V2I8_CLAMP_R; break; case NVPTXISD::Suld2DV2I16Clamp: - Opc = NVPTX::SULD_2D_V2I16_CLAMP; + Opc = NVPTX::SULD_2D_V2I16_CLAMP_R; break; case NVPTXISD::Suld2DV2I32Clamp: - Opc = NVPTX::SULD_2D_V2I32_CLAMP; + Opc = NVPTX::SULD_2D_V2I32_CLAMP_R; break; case NVPTXISD::Suld2DV2I64Clamp: - Opc = NVPTX::SULD_2D_V2I64_CLAMP; + Opc = NVPTX::SULD_2D_V2I64_CLAMP_R; break; case NVPTXISD::Suld2DV4I8Clamp: - Opc = NVPTX::SULD_2D_V4I8_CLAMP; + Opc = NVPTX::SULD_2D_V4I8_CLAMP_R; break; case NVPTXISD::Suld2DV4I16Clamp: - Opc = NVPTX::SULD_2D_V4I16_CLAMP; + Opc = NVPTX::SULD_2D_V4I16_CLAMP_R; break; case NVPTXISD::Suld2DV4I32Clamp: - Opc = NVPTX::SULD_2D_V4I32_CLAMP; + Opc = NVPTX::SULD_2D_V4I32_CLAMP_R; break; case NVPTXISD::Suld2DArrayI8Clamp: - Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP; + Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R; break; case NVPTXISD::Suld2DArrayI16Clamp: - Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP; + Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R; break; case NVPTXISD::Suld2DArrayI32Clamp: - Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP; + Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R; break; case NVPTXISD::Suld2DArrayI64Clamp: - Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP; + Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R; break; case NVPTXISD::Suld2DArrayV2I8Clamp: - Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP; + Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R; break; case NVPTXISD::Suld2DArrayV2I16Clamp: - Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP; + Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R; break; case NVPTXISD::Suld2DArrayV2I32Clamp: - Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP; + Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R; break; case NVPTXISD::Suld2DArrayV2I64Clamp: - Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP; + Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R; break; case NVPTXISD::Suld2DArrayV4I8Clamp: - Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP; + Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R; break; case NVPTXISD::Suld2DArrayV4I16Clamp: - Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP; + Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R; break; case NVPTXISD::Suld2DArrayV4I32Clamp: - Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP; + Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R; break; case NVPTXISD::Suld3DI8Clamp: - Opc = NVPTX::SULD_3D_I8_CLAMP; + Opc = NVPTX::SULD_3D_I8_CLAMP_R; break; case NVPTXISD::Suld3DI16Clamp: - Opc = NVPTX::SULD_3D_I16_CLAMP; + Opc = NVPTX::SULD_3D_I16_CLAMP_R; break; case NVPTXISD::Suld3DI32Clamp: - Opc = NVPTX::SULD_3D_I32_CLAMP; + Opc = NVPTX::SULD_3D_I32_CLAMP_R; break; case NVPTXISD::Suld3DI64Clamp: - Opc = NVPTX::SULD_3D_I64_CLAMP; + Opc = NVPTX::SULD_3D_I64_CLAMP_R; break; case NVPTXISD::Suld3DV2I8Clamp: - Opc = NVPTX::SULD_3D_V2I8_CLAMP; + Opc = NVPTX::SULD_3D_V2I8_CLAMP_R; break; case NVPTXISD::Suld3DV2I16Clamp: - Opc = NVPTX::SULD_3D_V2I16_CLAMP; + Opc = NVPTX::SULD_3D_V2I16_CLAMP_R; break; case NVPTXISD::Suld3DV2I32Clamp: - Opc = NVPTX::SULD_3D_V2I32_CLAMP; + Opc = NVPTX::SULD_3D_V2I32_CLAMP_R; break; case NVPTXISD::Suld3DV2I64Clamp: - Opc = NVPTX::SULD_3D_V2I64_CLAMP; + Opc = NVPTX::SULD_3D_V2I64_CLAMP_R; break; case NVPTXISD::Suld3DV4I8Clamp: - Opc = NVPTX::SULD_3D_V4I8_CLAMP; + Opc = NVPTX::SULD_3D_V4I8_CLAMP_R; break; case NVPTXISD::Suld3DV4I16Clamp: - Opc = NVPTX::SULD_3D_V4I16_CLAMP; + Opc = NVPTX::SULD_3D_V4I16_CLAMP_R; break; case NVPTXISD::Suld3DV4I32Clamp: - Opc = NVPTX::SULD_3D_V4I32_CLAMP; + Opc = NVPTX::SULD_3D_V4I32_CLAMP_R; break; case NVPTXISD::Suld1DI8Trap: - Opc = NVPTX::SULD_1D_I8_TRAP; + Opc = NVPTX::SULD_1D_I8_TRAP_R; break; case NVPTXISD::Suld1DI16Trap: - Opc = NVPTX::SULD_1D_I16_TRAP; + Opc = NVPTX::SULD_1D_I16_TRAP_R; break; case NVPTXISD::Suld1DI32Trap: - Opc = NVPTX::SULD_1D_I32_TRAP; + Opc = NVPTX::SULD_1D_I32_TRAP_R; break; case NVPTXISD::Suld1DI64Trap: - Opc = NVPTX::SULD_1D_I64_TRAP; + Opc = NVPTX::SULD_1D_I64_TRAP_R; break; case NVPTXISD::Suld1DV2I8Trap: - Opc = NVPTX::SULD_1D_V2I8_TRAP; + Opc = NVPTX::SULD_1D_V2I8_TRAP_R; break; case NVPTXISD::Suld1DV2I16Trap: - Opc = NVPTX::SULD_1D_V2I16_TRAP; + Opc = NVPTX::SULD_1D_V2I16_TRAP_R; break; case NVPTXISD::Suld1DV2I32Trap: - Opc = NVPTX::SULD_1D_V2I32_TRAP; + Opc = NVPTX::SULD_1D_V2I32_TRAP_R; break; case NVPTXISD::Suld1DV2I64Trap: - Opc = NVPTX::SULD_1D_V2I64_TRAP; + Opc = NVPTX::SULD_1D_V2I64_TRAP_R; break; case NVPTXISD::Suld1DV4I8Trap: - Opc = NVPTX::SULD_1D_V4I8_TRAP; + Opc = NVPTX::SULD_1D_V4I8_TRAP_R; break; case NVPTXISD::Suld1DV4I16Trap: - Opc = NVPTX::SULD_1D_V4I16_TRAP; + Opc = NVPTX::SULD_1D_V4I16_TRAP_R; break; case NVPTXISD::Suld1DV4I32Trap: - Opc = NVPTX::SULD_1D_V4I32_TRAP; + Opc = NVPTX::SULD_1D_V4I32_TRAP_R; break; case NVPTXISD::Suld1DArrayI8Trap: - Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP; + Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R; break; case NVPTXISD::Suld1DArrayI16Trap: - Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP; + Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R; break; case NVPTXISD::Suld1DArrayI32Trap: - Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP; + Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R; break; case NVPTXISD::Suld1DArrayI64Trap: - Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP; + Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R; break; case NVPTXISD::Suld1DArrayV2I8Trap: - Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP; + Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R; break; case NVPTXISD::Suld1DArrayV2I16Trap: - Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP; + Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R; break; case NVPTXISD::Suld1DArrayV2I32Trap: - Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP; + Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R; break; case NVPTXISD::Suld1DArrayV2I64Trap: - Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP; + Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R; break; case NVPTXISD::Suld1DArrayV4I8Trap: - Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP; + Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R; break; case NVPTXISD::Suld1DArrayV4I16Trap: - Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP; + Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R; break; case NVPTXISD::Suld1DArrayV4I32Trap: - Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP; + Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R; break; case NVPTXISD::Suld2DI8Trap: - Opc = NVPTX::SULD_2D_I8_TRAP; + Opc = NVPTX::SULD_2D_I8_TRAP_R; break; case NVPTXISD::Suld2DI16Trap: - Opc = NVPTX::SULD_2D_I16_TRAP; + Opc = NVPTX::SULD_2D_I16_TRAP_R; break; case NVPTXISD::Suld2DI32Trap: - Opc = NVPTX::SULD_2D_I32_TRAP; + Opc = NVPTX::SULD_2D_I32_TRAP_R; break; case NVPTXISD::Suld2DI64Trap: - Opc = NVPTX::SULD_2D_I64_TRAP; + Opc = NVPTX::SULD_2D_I64_TRAP_R; break; case NVPTXISD::Suld2DV2I8Trap: - Opc = NVPTX::SULD_2D_V2I8_TRAP; + Opc = NVPTX::SULD_2D_V2I8_TRAP_R; break; case NVPTXISD::Suld2DV2I16Trap: - Opc = NVPTX::SULD_2D_V2I16_TRAP; + Opc = NVPTX::SULD_2D_V2I16_TRAP_R; break; case NVPTXISD::Suld2DV2I32Trap: - Opc = NVPTX::SULD_2D_V2I32_TRAP; + Opc = NVPTX::SULD_2D_V2I32_TRAP_R; break; case NVPTXISD::Suld2DV2I64Trap: - Opc = NVPTX::SULD_2D_V2I64_TRAP; + Opc = NVPTX::SULD_2D_V2I64_TRAP_R; break; case NVPTXISD::Suld2DV4I8Trap: - Opc = NVPTX::SULD_2D_V4I8_TRAP; + Opc = NVPTX::SULD_2D_V4I8_TRAP_R; break; case NVPTXISD::Suld2DV4I16Trap: - Opc = NVPTX::SULD_2D_V4I16_TRAP; + Opc = NVPTX::SULD_2D_V4I16_TRAP_R; break; case NVPTXISD::Suld2DV4I32Trap: - Opc = NVPTX::SULD_2D_V4I32_TRAP; + Opc = NVPTX::SULD_2D_V4I32_TRAP_R; break; case NVPTXISD::Suld2DArrayI8Trap: - Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP; + Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R; break; case NVPTXISD::Suld2DArrayI16Trap: - Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP; + Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R; break; case NVPTXISD::Suld2DArrayI32Trap: - Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP; + Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R; break; case NVPTXISD::Suld2DArrayI64Trap: - Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP; + Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R; break; case NVPTXISD::Suld2DArrayV2I8Trap: - Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP; + Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R; break; case NVPTXISD::Suld2DArrayV2I16Trap: - Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP; + Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R; break; case NVPTXISD::Suld2DArrayV2I32Trap: - Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP; + Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R; break; case NVPTXISD::Suld2DArrayV2I64Trap: - Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP; + Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R; break; case NVPTXISD::Suld2DArrayV4I8Trap: - Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP; + Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R; break; case NVPTXISD::Suld2DArrayV4I16Trap: - Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP; + Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R; break; case NVPTXISD::Suld2DArrayV4I32Trap: - Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP; + Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R; break; case NVPTXISD::Suld3DI8Trap: - Opc = NVPTX::SULD_3D_I8_TRAP; + Opc = NVPTX::SULD_3D_I8_TRAP_R; break; case NVPTXISD::Suld3DI16Trap: - Opc = NVPTX::SULD_3D_I16_TRAP; + Opc = NVPTX::SULD_3D_I16_TRAP_R; break; case NVPTXISD::Suld3DI32Trap: - Opc = NVPTX::SULD_3D_I32_TRAP; + Opc = NVPTX::SULD_3D_I32_TRAP_R; break; case NVPTXISD::Suld3DI64Trap: - Opc = NVPTX::SULD_3D_I64_TRAP; + Opc = NVPTX::SULD_3D_I64_TRAP_R; break; case NVPTXISD::Suld3DV2I8Trap: - Opc = NVPTX::SULD_3D_V2I8_TRAP; + Opc = NVPTX::SULD_3D_V2I8_TRAP_R; break; case NVPTXISD::Suld3DV2I16Trap: - Opc = NVPTX::SULD_3D_V2I16_TRAP; + Opc = NVPTX::SULD_3D_V2I16_TRAP_R; break; case NVPTXISD::Suld3DV2I32Trap: - Opc = NVPTX::SULD_3D_V2I32_TRAP; + Opc = NVPTX::SULD_3D_V2I32_TRAP_R; break; case NVPTXISD::Suld3DV2I64Trap: - Opc = NVPTX::SULD_3D_V2I64_TRAP; + Opc = NVPTX::SULD_3D_V2I64_TRAP_R; break; case NVPTXISD::Suld3DV4I8Trap: - Opc = NVPTX::SULD_3D_V4I8_TRAP; + Opc = NVPTX::SULD_3D_V4I8_TRAP_R; break; case NVPTXISD::Suld3DV4I16Trap: - Opc = NVPTX::SULD_3D_V4I16_TRAP; + Opc = NVPTX::SULD_3D_V4I16_TRAP_R; break; case NVPTXISD::Suld3DV4I32Trap: - Opc = NVPTX::SULD_3D_V4I32_TRAP; + Opc = NVPTX::SULD_3D_V4I32_TRAP_R; break; case NVPTXISD::Suld1DI8Zero: - Opc = NVPTX::SULD_1D_I8_ZERO; + Opc = NVPTX::SULD_1D_I8_ZERO_R; break; case NVPTXISD::Suld1DI16Zero: - Opc = NVPTX::SULD_1D_I16_ZERO; + Opc = NVPTX::SULD_1D_I16_ZERO_R; break; case NVPTXISD::Suld1DI32Zero: - Opc = NVPTX::SULD_1D_I32_ZERO; + Opc = NVPTX::SULD_1D_I32_ZERO_R; break; case NVPTXISD::Suld1DI64Zero: - Opc = NVPTX::SULD_1D_I64_ZERO; + Opc = NVPTX::SULD_1D_I64_ZERO_R; break; case NVPTXISD::Suld1DV2I8Zero: - Opc = NVPTX::SULD_1D_V2I8_ZERO; + Opc = NVPTX::SULD_1D_V2I8_ZERO_R; break; case NVPTXISD::Suld1DV2I16Zero: - Opc = NVPTX::SULD_1D_V2I16_ZERO; + Opc = NVPTX::SULD_1D_V2I16_ZERO_R; break; case NVPTXISD::Suld1DV2I32Zero: - Opc = NVPTX::SULD_1D_V2I32_ZERO; + Opc = NVPTX::SULD_1D_V2I32_ZERO_R; break; case NVPTXISD::Suld1DV2I64Zero: - Opc = NVPTX::SULD_1D_V2I64_ZERO; + Opc = NVPTX::SULD_1D_V2I64_ZERO_R; break; case NVPTXISD::Suld1DV4I8Zero: - Opc = NVPTX::SULD_1D_V4I8_ZERO; + Opc = NVPTX::SULD_1D_V4I8_ZERO_R; break; case NVPTXISD::Suld1DV4I16Zero: - Opc = NVPTX::SULD_1D_V4I16_ZERO; + Opc = NVPTX::SULD_1D_V4I16_ZERO_R; break; case NVPTXISD::Suld1DV4I32Zero: - Opc = NVPTX::SULD_1D_V4I32_ZERO; + Opc = NVPTX::SULD_1D_V4I32_ZERO_R; break; case NVPTXISD::Suld1DArrayI8Zero: - Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO; + Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R; break; case NVPTXISD::Suld1DArrayI16Zero: - Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO; + Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R; break; case NVPTXISD::Suld1DArrayI32Zero: - Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO; + Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R; break; case NVPTXISD::Suld1DArrayI64Zero: - Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO; + Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R; break; case NVPTXISD::Suld1DArrayV2I8Zero: - Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO; + Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R; break; case NVPTXISD::Suld1DArrayV2I16Zero: - Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO; + Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R; break; case NVPTXISD::Suld1DArrayV2I32Zero: - Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO; + Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R; break; case NVPTXISD::Suld1DArrayV2I64Zero: - Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO; + Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R; break; case NVPTXISD::Suld1DArrayV4I8Zero: - Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO; + Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R; break; case NVPTXISD::Suld1DArrayV4I16Zero: - Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO; + Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R; break; case NVPTXISD::Suld1DArrayV4I32Zero: - Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO; + Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R; break; case NVPTXISD::Suld2DI8Zero: - Opc = NVPTX::SULD_2D_I8_ZERO; + Opc = NVPTX::SULD_2D_I8_ZERO_R; break; case NVPTXISD::Suld2DI16Zero: - Opc = NVPTX::SULD_2D_I16_ZERO; + Opc = NVPTX::SULD_2D_I16_ZERO_R; break; case NVPTXISD::Suld2DI32Zero: - Opc = NVPTX::SULD_2D_I32_ZERO; + Opc = NVPTX::SULD_2D_I32_ZERO_R; break; case NVPTXISD::Suld2DI64Zero: - Opc = NVPTX::SULD_2D_I64_ZERO; + Opc = NVPTX::SULD_2D_I64_ZERO_R; break; case NVPTXISD::Suld2DV2I8Zero: - Opc = NVPTX::SULD_2D_V2I8_ZERO; + Opc = NVPTX::SULD_2D_V2I8_ZERO_R; break; case NVPTXISD::Suld2DV2I16Zero: - Opc = NVPTX::SULD_2D_V2I16_ZERO; + Opc = NVPTX::SULD_2D_V2I16_ZERO_R; break; case NVPTXISD::Suld2DV2I32Zero: - Opc = NVPTX::SULD_2D_V2I32_ZERO; + Opc = NVPTX::SULD_2D_V2I32_ZERO_R; break; case NVPTXISD::Suld2DV2I64Zero: - Opc = NVPTX::SULD_2D_V2I64_ZERO; + Opc = NVPTX::SULD_2D_V2I64_ZERO_R; break; case NVPTXISD::Suld2DV4I8Zero: - Opc = NVPTX::SULD_2D_V4I8_ZERO; + Opc = NVPTX::SULD_2D_V4I8_ZERO_R; break; case NVPTXISD::Suld2DV4I16Zero: - Opc = NVPTX::SULD_2D_V4I16_ZERO; + Opc = NVPTX::SULD_2D_V4I16_ZERO_R; break; case NVPTXISD::Suld2DV4I32Zero: - Opc = NVPTX::SULD_2D_V4I32_ZERO; + Opc = NVPTX::SULD_2D_V4I32_ZERO_R; break; case NVPTXISD::Suld2DArrayI8Zero: - Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO; + Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R; break; case NVPTXISD::Suld2DArrayI16Zero: - Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO; + Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R; break; case NVPTXISD::Suld2DArrayI32Zero: - Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO; + Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R; break; case NVPTXISD::Suld2DArrayI64Zero: - Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO; + Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R; break; case NVPTXISD::Suld2DArrayV2I8Zero: - Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO; + Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R; break; case NVPTXISD::Suld2DArrayV2I16Zero: - Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO; + Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R; break; case NVPTXISD::Suld2DArrayV2I32Zero: - Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO; + Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R; break; case NVPTXISD::Suld2DArrayV2I64Zero: - Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO; + Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R; break; case NVPTXISD::Suld2DArrayV4I8Zero: - Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO; + Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R; break; case NVPTXISD::Suld2DArrayV4I16Zero: - Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO; + Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R; break; case NVPTXISD::Suld2DArrayV4I32Zero: - Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO; + Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R; break; case NVPTXISD::Suld3DI8Zero: - Opc = NVPTX::SULD_3D_I8_ZERO; + Opc = NVPTX::SULD_3D_I8_ZERO_R; break; case NVPTXISD::Suld3DI16Zero: - Opc = NVPTX::SULD_3D_I16_ZERO; + Opc = NVPTX::SULD_3D_I16_ZERO_R; break; case NVPTXISD::Suld3DI32Zero: - Opc = NVPTX::SULD_3D_I32_ZERO; + Opc = NVPTX::SULD_3D_I32_ZERO_R; break; case NVPTXISD::Suld3DI64Zero: - Opc = NVPTX::SULD_3D_I64_ZERO; + Opc = NVPTX::SULD_3D_I64_ZERO_R; break; case NVPTXISD::Suld3DV2I8Zero: - Opc = NVPTX::SULD_3D_V2I8_ZERO; + Opc = NVPTX::SULD_3D_V2I8_ZERO_R; break; case NVPTXISD::Suld3DV2I16Zero: - Opc = NVPTX::SULD_3D_V2I16_ZERO; + Opc = NVPTX::SULD_3D_V2I16_ZERO_R; break; case NVPTXISD::Suld3DV2I32Zero: - Opc = NVPTX::SULD_3D_V2I32_ZERO; + Opc = NVPTX::SULD_3D_V2I32_ZERO_R; break; case NVPTXISD::Suld3DV2I64Zero: - Opc = NVPTX::SULD_3D_V2I64_ZERO; + Opc = NVPTX::SULD_3D_V2I64_ZERO_R; break; case NVPTXISD::Suld3DV4I8Zero: - Opc = NVPTX::SULD_3D_V4I8_ZERO; + Opc = NVPTX::SULD_3D_V4I8_ZERO_R; break; case NVPTXISD::Suld3DV4I16Zero: - Opc = NVPTX::SULD_3D_V4I16_ZERO; + Opc = NVPTX::SULD_3D_V4I16_ZERO_R; break; case NVPTXISD::Suld3DV4I32Zero: - Opc = NVPTX::SULD_3D_V4I32_ZERO; + Opc = NVPTX::SULD_3D_V4I32_ZERO_R; break; } diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -2464,2303 +2464,1563 @@ // texmode_independent let IsTex = true, IsTexModeUnified = false in { // Texture fetch instructions using handles -def TEX_1D_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), - "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", - []>; -def TEX_1D_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), - "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", - []>; -def TEX_1D_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod), - "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x\\}], $lod;", - []>; -def TEX_1D_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_1D_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), - "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", - []>; -def TEX_1D_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), - "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", - []>; -def TEX_1D_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x\\}], $lod;", - []>; -def TEX_1D_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_1D_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), - "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", - []>; -def TEX_1D_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), - "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", - []>; -def TEX_1D_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x\\}], $lod;", - []>; -def TEX_1D_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; - -def TEX_1D_ARRAY_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}];", - []>; -def TEX_1D_ARRAY_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), - "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}];", - []>; -def TEX_1D_ARRAY_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}], $lod;", - []>; -def TEX_1D_ARRAY_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_1D_ARRAY_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}];", - []>; -def TEX_1D_ARRAY_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), - "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}];", - []>; -def TEX_1D_ARRAY_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}], $lod;", - []>; -def TEX_1D_ARRAY_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_1D_ARRAY_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}];", - []>; -def TEX_1D_ARRAY_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), - "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}];", - []>; -def TEX_1D_ARRAY_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}], $lod;", - []>; -def TEX_1D_ARRAY_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_2D_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TEX_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TEX_2D_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$lod), - "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}], $lod;", - []>; -def TEX_2D_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_2D_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TEX_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TEX_2D_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$lod), - "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}], $lod;", - []>; -def TEX_2D_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_2D_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TEX_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TEX_2D_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$lod), - "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}], $lod;", - []>; -def TEX_2D_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; +class TEX_1D_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", + []>; + +multiclass TEX_1D { + def _RR : TEX_1D_base; + def _RI : TEX_1D_base; + def _IR : TEX_1D_base; + def _II : TEX_1D_base; +} -def TEX_2D_ARRAY_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, - Int32Regs:$y), - "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_2D_ARRAY_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y), - "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_2D_ARRAY_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$lod), - "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", - []>; -def TEX_2D_ARRAY_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_2D_ARRAY_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, - Int32Regs:$y), - "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_2D_ARRAY_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y), - "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_2D_ARRAY_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$lod), - "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", - []>; -def TEX_2D_ARRAY_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_2D_ARRAY_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, - Int32Regs:$y), - "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_2D_ARRAY_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y), - "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_2D_ARRAY_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$lod), - "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", - []>; -def TEX_2D_ARRAY_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; +defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_1D_LEVEL_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;", + []>; + +multiclass TEX_1D_LEVEL { + def _RR : TEX_1D_LEVEL_base; + def _RI : TEX_1D_LEVEL_base; + def _IR : TEX_1D_LEVEL_base; + def _II : TEX_1D_LEVEL_base; +} -def TEX_3D_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$z), - "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_3D_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z), - "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_3D_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, Float32Regs:$lod), - "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_3D_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$gradx2, Float32Regs:$grady0, - Float32Regs:$grady1, Float32Regs:$grady2), - "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], " - "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " - "\\{$grady0, $grady1, $grady2, $grady2\\};", - []>; -def TEX_3D_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$z), - "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_3D_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z), - "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_3D_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, Float32Regs:$lod), - "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_3D_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$gradx2, Float32Regs:$grady0, - Float32Regs:$grady1, Float32Regs:$grady2), - "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], " - "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " - "\\{$grady0, $grady1, $grady2, $grady2\\};", - []>; -def TEX_3D_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$z), - "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_3D_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z), - "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_3D_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, Float32Regs:$lod), - "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_3D_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$gradx2, Float32Regs:$grady0, - Float32Regs:$grady1, Float32Regs:$grady2), - "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], " - "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " - "\\{$grady0, $grady1, $grady2, $grady2\\};", - []>; +defm TEX_1D_F32_F32_LEVEL : + TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_1D_S32_F32_LEVEL : + TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_1D_U32_F32_LEVEL : + TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_1D_GRAD_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}]," + " \\{$gradx\\}, \\{$grady\\};", + []>; + +multiclass TEX_1D_GRAD { + def _RR : TEX_1D_GRAD_base; + def _RI : TEX_1D_GRAD_base; + def _IR : TEX_1D_GRAD_base; + def _II : TEX_1D_GRAD_base; +} -def TEX_CUBE_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_CUBE_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_CUBE_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_CUBE_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_CUBE_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_CUBE_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", - []>; +defm TEX_1D_F32_F32_GRAD + : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_1D_S32_F32_GRAD + : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_1D_U32_F32_GRAD + : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_1D_ARRAY_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];", + []>; + +multiclass TEX_1D_ARRAY { + def _RR : TEX_1D_ARRAY_base; + def _RI : TEX_1D_ARRAY_base; + def _IR : TEX_1D_ARRAY_base; + def _II : TEX_1D_ARRAY_base; +} -def TEX_CUBE_ARRAY_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $z\\}];", - []>; -def TEX_CUBE_ARRAY_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", - []>; -def TEX_CUBE_ARRAY_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $z\\}];", - []>; -def TEX_CUBE_ARRAY_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", - []>; -def TEX_CUBE_ARRAY_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $z\\}];", - []>; -def TEX_CUBE_ARRAY_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", - []>; +defm TEX_1D_ARRAY_F32_F32 + : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_1D_ARRAY_F32_S32 + : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_1D_ARRAY_S32_S32 + : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_1D_ARRAY_S32_F32 + : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_1D_ARRAY_U32_S32 + : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_1D_ARRAY_U32_F32 + : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_1D_ARRAY_LEVEL_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$l, $x\\}], $lod;", + []>; + +multiclass TEX_1D_ARRAY_LEVEL { + def _RR : TEX_1D_ARRAY_LEVEL_base; + def _RI : TEX_1D_ARRAY_LEVEL_base; + def _IR : TEX_1D_ARRAY_LEVEL_base; + def _II : TEX_1D_ARRAY_LEVEL_base; +} -def TLD4_R_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_G_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_B_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_A_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_R_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_G_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_B_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_A_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_R_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_G_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_B_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_A_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; +defm TEX_1D_ARRAY_F32_F32_LEVEL + : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_1D_ARRAY_S32_F32_LEVEL + : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_1D_ARRAY_U32_F32_LEVEL + : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_1D_ARRAY_GRAD_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x, + intype:$gradx, intype:$grady)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}]," + " \\{$gradx\\}, \\{$grady\\};", + []>; + +multiclass TEX_1D_ARRAY_GRAD { + def _RR : TEX_1D_ARRAY_GRAD_base; + def _RI : TEX_1D_ARRAY_GRAD_base; + def _IR : TEX_1D_ARRAY_GRAD_base; + def _II : TEX_1D_ARRAY_GRAD_base; } +defm TEX_1D_ARRAY_F32_F32_GRAD + : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_1D_ARRAY_S32_F32_GRAD + : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_1D_ARRAY_U32_F32_GRAD + : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_2D_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];", + []>; + +multiclass TEX_2D { + def _RR : TEX_2D_base; + def _RI : TEX_2D_base; + def _IR : TEX_2D_base; + def _II : TEX_2D_base; +} -// texmode_unified -let IsTex = true, IsTexModeUnified = true in { -// Texture fetch instructions using handles -def TEX_UNIFIED_1D_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x), - "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", - []>; -def TEX_UNIFIED_1D_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x), - "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", - []>; -def TEX_UNIFIED_1D_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod), - "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x\\}], $lod;", - []>; -def TEX_UNIFIED_1D_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_UNIFIED_1D_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x), - "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", - []>; -def TEX_UNIFIED_1D_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x), - "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", - []>; -def TEX_UNIFIED_1D_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x\\}], $lod;", - []>; -def TEX_UNIFIED_1D_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_UNIFIED_1D_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x), - "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", - []>; -def TEX_UNIFIED_1D_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x), - "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", - []>; -def TEX_UNIFIED_1D_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x\\}], $lod;", - []>; -def TEX_UNIFIED_1D_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; +defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_2D_LEVEL_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$x, $y\\}], $lod;", + []>; + +multiclass TEX_2D_LEVEL { + def _RR : TEX_2D_LEVEL_base; + def _RI : TEX_2D_LEVEL_base; + def _IR : TEX_2D_LEVEL_base; + def _II : TEX_2D_LEVEL_base; +} -def TEX_UNIFIED_1D_ARRAY_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), - "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}];", - []>; -def TEX_UNIFIED_1D_ARRAY_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), - "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}];", - []>; -def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}], $lod;", - []>; -def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_UNIFIED_1D_ARRAY_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), - "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}];", - []>; -def TEX_UNIFIED_1D_ARRAY_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), - "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}];", - []>; -def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}], $lod;", - []>; -def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_UNIFIED_1D_ARRAY_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), - "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}];", - []>; -def TEX_UNIFIED_1D_ARRAY_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), - "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}];", - []>; -def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}], $lod;", - []>; -def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; +defm TEX_2D_F32_F32_LEVEL : + TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_2D_S32_F32_LEVEL : + TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_2D_U32_F32_LEVEL : + TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_2D_GRAD_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y, + intype:$gradx0, intype:$gradx1, + intype:$grady0, intype:$grady1)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}]," + " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", + []>; + +multiclass TEX_2D_GRAD { + def _RR : TEX_2D_GRAD_base; + def _RI : TEX_2D_GRAD_base; + def _IR : TEX_2D_GRAD_base; + def _II : TEX_2D_GRAD_base; +} -def TEX_UNIFIED_2D_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), - "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TEX_UNIFIED_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TEX_UNIFIED_2D_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$lod), - "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}], $lod;", - []>; -def TEX_UNIFIED_2D_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_UNIFIED_2D_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), - "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TEX_UNIFIED_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TEX_UNIFIED_2D_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$lod), - "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}], $lod;", - []>; -def TEX_UNIFIED_2D_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_UNIFIED_2D_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), - "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TEX_UNIFIED_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TEX_UNIFIED_2D_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$lod), - "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}], $lod;", - []>; -def TEX_UNIFIED_2D_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; +defm TEX_2D_F32_F32_GRAD : + TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_2D_S32_F32_GRAD : + TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_2D_U32_F32_GRAD : + TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_2D_ARRAY_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$l, $x, $y, $y\\}];", + []>; + +multiclass TEX_2D_ARRAY { + def _RR : TEX_2D_ARRAY_base; + def _RI : TEX_2D_ARRAY_base; + def _IR : TEX_2D_ARRAY_base; + def _II : TEX_2D_ARRAY_base; +} -def TEX_UNIFIED_2D_ARRAY_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, - Int32Regs:$y), - "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_UNIFIED_2D_ARRAY_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y), - "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$lod), - "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}], $lod;", - []>; -def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_UNIFIED_2D_ARRAY_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, - Int32Regs:$y), - "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_UNIFIED_2D_ARRAY_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y), - "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$lod), - "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}], $lod;", - []>; -def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_UNIFIED_2D_ARRAY_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, - Int32Regs:$y), - "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_UNIFIED_2D_ARRAY_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y), - "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$lod), - "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}], $lod;", - []>; -def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; +defm TEX_2D_ARRAY_F32_F32 + : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_2D_ARRAY_F32_S32 + : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_2D_ARRAY_S32_S32 + : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_2D_ARRAY_S32_F32 + : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_2D_ARRAY_U32_S32 + : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_2D_ARRAY_U32_F32 + : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_2D_ARRAY_LEVEL_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, + intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;", + []>; + +multiclass TEX_2D_ARRAY_LEVEL { + def _RR : TEX_2D_ARRAY_LEVEL_base; + def _RI : TEX_2D_ARRAY_LEVEL_base; + def _IR : TEX_2D_ARRAY_LEVEL_base; + def _II : TEX_2D_ARRAY_LEVEL_base; +} -def TEX_UNIFIED_3D_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$z), - "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_3D_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z), - "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_3D_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, Float32Regs:$lod), - "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_UNIFIED_3D_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$gradx2, Float32Regs:$grady0, - Float32Regs:$grady1, Float32Regs:$grady2), - "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], " - "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " - "\\{$grady0, $grady1, $grady2, $grady2\\};", - []>; -def TEX_UNIFIED_3D_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$z), - "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_3D_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z), - "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_3D_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, Float32Regs:$lod), - "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_UNIFIED_3D_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$gradx2, Float32Regs:$grady0, - Float32Regs:$grady1, Float32Regs:$grady2), - "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], " - "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " - "\\{$grady0, $grady1, $grady2, $grady2\\};", - []>; -def TEX_UNIFIED_3D_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$z), - "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_3D_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z), - "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_3D_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, Float32Regs:$lod), - "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_UNIFIED_3D_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$gradx2, Float32Regs:$grady0, - Float32Regs:$grady1, Float32Regs:$grady2), - "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], " - "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " - "\\{$grady0, $grady1, $grady2, $grady2\\};", - []>; +defm TEX_2D_ARRAY_F32_F32_LEVEL + : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_2D_ARRAY_S32_F32_LEVEL + : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_2D_ARRAY_U32_F32_LEVEL + : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_2D_ARRAY_GRAD_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, + intype:$gradx0, intype:$gradx1, + intype:$grady0, intype:$grady1)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$l, $x, $y, $y\\}]," + " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", + []>; + +multiclass TEX_2D_ARRAY_GRAD { + def _RR : TEX_2D_ARRAY_GRAD_base; + def _RI : TEX_2D_ARRAY_GRAD_base; + def _IR : TEX_2D_ARRAY_GRAD_base; + def _II : TEX_2D_ARRAY_GRAD_base; +} -def TEX_UNIFIED_CUBE_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_CUBE_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_UNIFIED_CUBE_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_CUBE_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_UNIFIED_CUBE_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_CUBE_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], $lod;", - []>; +defm TEX_2D_ARRAY_F32_F32_GRAD + : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_2D_ARRAY_S32_F32_GRAD + : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_2D_ARRAY_U32_F32_GRAD + : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_3D_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$x, $y, $z, $z\\}];", + []>; + +multiclass TEX_3D { + def _RR : TEX_3D_base; + def _RI : TEX_3D_base; + def _IR : TEX_3D_base; + def _II : TEX_3D_base; +} -def TEX_UNIFIED_CUBE_ARRAY_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $z\\}];", - []>; -def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $z\\}], $lod;", - []>; -def TEX_UNIFIED_CUBE_ARRAY_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $z\\}];", - []>; -def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $z\\}], $lod;", - []>; -def TEX_UNIFIED_CUBE_ARRAY_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $z\\}];", - []>; -def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $z\\}], $lod;", - []>; +defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_3D_LEVEL_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y, intype:$z, + intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", + []>; + +multiclass TEX_3D_LEVEL { + def _RR : TEX_3D_LEVEL_base; + def _RI : TEX_3D_LEVEL_base; + def _IR : TEX_3D_LEVEL_base; + def _II : TEX_3D_LEVEL_base; +} -def TLD4_UNIFIED_R_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_G_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_B_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_A_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_R_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_G_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_B_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_A_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_R_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_G_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_B_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_A_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; +defm TEX_3D_F32_F32_LEVEL + : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_3D_S32_F32_LEVEL + : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_3D_U32_F32_LEVEL + : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_3D_GRAD_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y, intype:$z, + intype :$gradx0, intype:$gradx1, + intype:$gradx2, intype:$grady0, + intype:$grady1, intype:$grady2)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$x, $y, $z, $z\\}]," + " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," + " \\{$grady0, $grady1, $grady2, $grady2\\};", + []>; + +multiclass TEX_3D_GRAD { + def _RR : TEX_3D_GRAD_base; + def _RI : TEX_3D_GRAD_base; + def _IR : TEX_3D_GRAD_base; + def _II : TEX_3D_GRAD_base; } +defm TEX_3D_F32_F32_GRAD + : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_3D_S32_F32_GRAD + : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_3D_U32_F32_GRAD + : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_CUBE_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$x, $y, $z, $z\\}];", + []>; + +multiclass TEX_CUBE { + def _RR : TEX_CUBE_base; + def _RI : TEX_CUBE_base; + def _IR : TEX_CUBE_base; + def _II : TEX_CUBE_base; +} +defm TEX_CUBE_F32_F32 + : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_CUBE_S32_F32 + : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_CUBE_U32_F32 + : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_CUBE_LEVEL_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y, intype:$z, + intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", + []>; + +multiclass TEX_CUBE_LEVEL { + def _RR : TEX_CUBE_LEVEL_base; + def _RI : TEX_CUBE_LEVEL_base; + def _IR : TEX_CUBE_LEVEL_base; + def _II : TEX_CUBE_LEVEL_base; +} -//=== Surface load instructions -// .clamp variant -let IsSuld = true in { -def SULD_1D_I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];", - []>; +defm TEX_CUBE_F32_F32_LEVEL + : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_CUBE_S32_F32_LEVEL + : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_CUBE_U32_F32_LEVEL + : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_CUBE_ARRAY_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, + intype:$z)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$l, $x, $y, $z\\}];", + []>; + +multiclass TEX_CUBE_ARRAY { + def _RR : TEX_CUBE_ARRAY_base; + def _RI : TEX_CUBE_ARRAY_base; + def _IR : TEX_CUBE_ARRAY_base; + def _II : TEX_CUBE_ARRAY_base; +} -def SULD_1D_ARRAY_I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; +defm TEX_CUBE_ARRAY_F32_F32 + : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_CUBE_ARRAY_S32_F32 + : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_CUBE_ARRAY_U32_F32 + : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_CUBE_ARRAY_LEVEL_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, + intype:$z, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;", + []>; + +multiclass TEX_CUBE_ARRAY_LEVEL { + def _RR : TEX_CUBE_ARRAY_LEVEL_base; + def _RI : TEX_CUBE_ARRAY_LEVEL_base; + def _IR : TEX_CUBE_ARRAY_LEVEL_base; + def _II : TEX_CUBE_ARRAY_LEVEL_base; +} -def SULD_2D_I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; +defm TEX_CUBE_ARRAY_F32_F32_LEVEL + : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", + Float32Regs, Float32Regs>; +defm TEX_CUBE_ARRAY_S32_F32_LEVEL + : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", + Int32Regs, Float32Regs>; +defm TEX_CUBE_ARRAY_U32_F32_LEVEL + : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", + Int32Regs, Float32Regs>; + +class TLD4_2D_base + : NVPTXInst<(outs outtype:$v0, outtype:$v1, + outtype:$v2, outtype:$v3), + !con(texsamp, (ins intype:$x, intype:$y)), + inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];", + []>; + +multiclass TLD4_2D { + def _RR : TLD4_2D_base; + def _RI : TLD4_2D_base; + def _IR : TLD4_2D_base; + def _II : TLD4_2D_base; +} -def SULD_2D_ARRAY_I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; +defm TLD4_R_2D_F32_F32 + : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TLD4_G_2D_F32_F32 + : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TLD4_B_2D_F32_F32 + : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TLD4_A_2D_F32_F32 + : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; + +defm TLD4_R_2D_S32_F32 + : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TLD4_G_2D_S32_F32 + : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TLD4_B_2D_S32_F32 + : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TLD4_A_2D_S32_F32 + : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; + +defm TLD4_R_2D_U32_F32 + : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; +defm TLD4_G_2D_U32_F32 + : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; +defm TLD4_B_2D_U32_F32 + : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; +defm TLD4_A_2D_U32_F32 + : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; -def SULD_3D_I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; } -let IsSuld = 2 in { -def SULD_1D_V2I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_ARRAY_V2I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; +// texmode_unified +let IsTex = true, IsTexModeUnified = true in { +// Texture fetch instructions using handles -def SULD_2D_V2I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; +class TEX_UNIFIED_1D_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", + []>; + +multiclass TEX_UNIFIED_1D { + def _R : TEX_UNIFIED_1D_base; + def _I : TEX_UNIFIED_1D_base; +} -def SULD_2D_ARRAY_V2I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; +defm TEX_UNIFIED_1D_F32_S32 + : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_UNIFIED_1D_F32_F32 + : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_S32_S32 + : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_1D_S32_F32 + : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_U32_S32 + : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_1D_U32_F32 + : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_1D_LEVEL_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;", + []>; + +multiclass TEX_UNIFIED_1D_LEVEL { + def _R : TEX_UNIFIED_1D_LEVEL_base; + def _I : TEX_UNIFIED_1D_LEVEL_base; +} -def SULD_3D_V2I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; +defm TEX_UNIFIED_1D_F32_F32_LEVEL + : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_S32_F32_LEVEL + : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_U32_F32_LEVEL + : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_1D_GRAD_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; + +multiclass TEX_UNIFIED_1D_GRAD { + def _R : TEX_UNIFIED_1D_GRAD_base; + def _I : TEX_UNIFIED_1D_GRAD_base; } -let IsSuld = 3 in { -def SULD_1D_V4I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V4I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V4I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; +defm TEX_UNIFIED_1D_F32_F32_GRAD + : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_S32_F32_GRAD + : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_U32_F32_GRAD + : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_1D_ARRAY_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];", + []>; + +multiclass TEX_UNIFIED_1D_ARRAY { + def _R : TEX_UNIFIED_1D_ARRAY_base; + def _I : TEX_UNIFIED_1D_ARRAY_base; +} -def SULD_1D_ARRAY_V4I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V4I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V4I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; +defm TEX_UNIFIED_1D_ARRAY_F32_S32 + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_UNIFIED_1D_ARRAY_F32_F32 + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_ARRAY_S32_S32 + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_1D_ARRAY_S32_F32 + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_ARRAY_U32_S32 + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_1D_ARRAY_U32_F32 + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_1D_ARRAY_LEVEL_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;", + []>; + +multiclass TEX_UNIFIED_1D_ARRAY_LEVEL { + def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base; + def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base; +} -def SULD_2D_V4I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V4I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V4I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; +defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL + : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", + Float32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL + : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", + Int32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL + : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", + Int32Regs, Float32Regs>; + +class TEX_UNIFIED_1D_ARRAY_GRAD_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x, + intype:$gradx, intype:$grady)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; + +multiclass TEX_UNIFIED_1D_ARRAY_GRAD { + def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base; + def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base; +} -def SULD_2D_ARRAY_V4I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V4I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V4I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; +defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD + : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", + Float32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD + : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", + Int32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD + : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", + Int32Regs, Float32Regs>; + +class TEX_UNIFIED_2D_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];", + []>; + +multiclass TEX_UNIFIED_2D { + def _R : TEX_UNIFIED_2D_base; + def _I : TEX_UNIFIED_2D_base; +} +defm TEX_UNIFIED_2D_F32_S32 + : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_UNIFIED_2D_F32_F32 + : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_S32_S32 + : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_2D_S32_F32 + : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_U32_S32 + : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_2D_U32_F32 + : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_2D_LEVEL_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;", + []>; + +multiclass TEX_UNIFIED_2D_LEVEL { + def _R : TEX_UNIFIED_2D_LEVEL_base; + def _I : TEX_UNIFIED_2D_LEVEL_base; +} -def SULD_3D_V4I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V4I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V4I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; +defm TEX_UNIFIED_2D_F32_F32_LEVEL + : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_S32_F32_LEVEL + : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_U32_F32_LEVEL + : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_2D_GRAD_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y, + intype:$gradx0, intype:$gradx1, + intype:$grady0, intype:$grady1)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}]," + " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", + []>; +multiclass TEX_UNIFIED_2D_GRAD { + def _R : TEX_UNIFIED_2D_GRAD_base; + def _I : TEX_UNIFIED_2D_GRAD_base; } +defm TEX_UNIFIED_2D_F32_F32_GRAD + : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_S32_F32_GRAD + : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_U32_F32_GRAD + : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_2D_ARRAY_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];", + []>; +multiclass TEX_UNIFIED_2D_ARRAY { + def _R : TEX_UNIFIED_2D_ARRAY_base; + def _I : TEX_UNIFIED_2D_ARRAY_base; +} -// .trap variant -let IsSuld = true in { -def SULD_1D_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I64_TRAP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];", - []>; +defm TEX_UNIFIED_2D_ARRAY_F32_S32 + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_UNIFIED_2D_ARRAY_F32_F32 + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_ARRAY_S32_S32 + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_2D_ARRAY_S32_F32 + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_ARRAY_U32_S32 + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_2D_ARRAY_U32_F32 + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_2D_ARRAY_LEVEL_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, + intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, \\{$l, $x, $y, $y\\}], $lod;", + []>; +multiclass TEX_UNIFIED_2D_ARRAY_LEVEL { + def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base; + def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base; +} -def SULD_1D_ARRAY_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I64_TRAP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; +defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL + : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", + Float32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL + : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", + Int32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL + : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", + Int32Regs, Float32Regs>; + +class TEX_UNIFIED_2D_ARRAY_GRAD_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, + intype:$gradx0, intype:$gradx1, + intype:$grady0, intype:$grady1)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}]," + " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", + []>; +multiclass TEX_UNIFIED_2D_ARRAY_GRAD { + def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base; + def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base; +} -def SULD_2D_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I64_TRAP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; +defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD + : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", + Float32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD + : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", + Int32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD + : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", + Int32Regs, Float32Regs>; + +class TEX_UNIFIED_3D_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y, intype:$z)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", + []>; +multiclass TEX_UNIFIED_3D { + def _R : TEX_UNIFIED_3D_base; + def _I : TEX_UNIFIED_3D_base; +} -def SULD_2D_ARRAY_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I64_TRAP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; +defm TEX_UNIFIED_3D_F32_S32 + : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_UNIFIED_3D_F32_F32 + : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_3D_S32_S32 + : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_3D_S32_F32 + : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_3D_U32_S32 + : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_3D_U32_F32 + : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_3D_LEVEL_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; +multiclass TEX_UNIFIED_3D_LEVEL { + def _R : TEX_UNIFIED_3D_LEVEL_base; + def _I : TEX_UNIFIED_3D_LEVEL_base; +} -def SULD_3D_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I64_TRAP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; +defm TEX_UNIFIED_3D_F32_F32_LEVEL + : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_3D_S32_F32_LEVEL + : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_3D_U32_F32_LEVEL + : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_3D_GRAD_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y, intype:$z, + intype:$gradx0, intype:$gradx1, + intype:$gradx2, intype:$grady0, + intype:$grady1, intype:$grady2)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}]," + " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," + " \\{$grady0, $grady1, $grady2, $grady2\\};", + []>; +multiclass TEX_UNIFIED_3D_GRAD { + def _R : TEX_UNIFIED_3D_GRAD_base; + def _I : TEX_UNIFIED_3D_GRAD_base; } -let IsSuld = 2 in { -def SULD_1D_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I64_TRAP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; +defm TEX_UNIFIED_3D_F32_F32_GRAD + : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_3D_S32_F32_GRAD + : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_3D_U32_F32_GRAD + : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_CUBE_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y, intype:$z)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", + []>; +multiclass TEX_UNIFIED_CUBE { + def _R : TEX_UNIFIED_CUBE_base; + def _I : TEX_UNIFIED_CUBE_base; +} -def SULD_1D_ARRAY_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I64_TRAP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; +defm TEX_UNIFIED_CUBE_F32_F32 + : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_S32_F32 + : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_U32_F32 + : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_CUBE_LEVEL_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; +multiclass TEX_UNIFIED_CUBE_LEVEL { + def _R : TEX_UNIFIED_CUBE_LEVEL_base; + def _I : TEX_UNIFIED_CUBE_LEVEL_base; +} -def SULD_2D_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I64_TRAP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; +defm TEX_UNIFIED_CUBE_F32_F32_LEVEL + : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", + Float32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_S32_F32_LEVEL + : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", + Int32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_U32_F32_LEVEL + : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", + Int32Regs, Float32Regs>; + +class TEX_UNIFIED_CUBE_ARRAY_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];", + []>; +multiclass TEX_UNIFIED_CUBE_ARRAY { + def _R : TEX_UNIFIED_CUBE_ARRAY_base; + def _I : TEX_UNIFIED_CUBE_ARRAY_base; +} -def SULD_2D_ARRAY_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I64_TRAP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; +defm TEX_UNIFIED_CUBE_ARRAY_F32_F32 + : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_ARRAY_S32_F32 + : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_ARRAY_U32_F32 + : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, + intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, \\{$l, $x, $y, $z\\}], $lod;", + []>; +multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL { + def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base; + def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base; +} -def SULD_3D_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I64_TRAP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; +defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL + : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", + Float32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL + : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", + Int32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL + : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", + Int32Regs, Float32Regs>; + +class TLD4_UNIFIED_2D_base + : NVPTXInst<(outs outtype:$v0, outtype:$v1, + outtype:$v2, outtype:$v3), + !con(tex, (ins intype:$x, intype:$y)), + inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];", + []>; +multiclass TLD4_UNIFIED_2D { + def _R : TLD4_UNIFIED_2D_base; + def _I : TLD4_UNIFIED_2D_base; } -let IsSuld = 3 in { -def SULD_1D_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; +defm TLD4_UNIFIED_R_2D_F32_F32 + : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TLD4_UNIFIED_G_2D_F32_F32 + : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TLD4_UNIFIED_B_2D_F32_F32 + : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TLD4_UNIFIED_A_2D_F32_F32 + : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; + +defm TLD4_UNIFIED_R_2D_S32_F32 + : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TLD4_UNIFIED_G_2D_S32_F32 + : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TLD4_UNIFIED_B_2D_S32_F32 + : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TLD4_UNIFIED_A_2D_S32_F32 + : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; + +defm TLD4_UNIFIED_R_2D_U32_F32 + : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; +defm TLD4_UNIFIED_G_2D_U32_F32 + : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; +defm TLD4_UNIFIED_B_2D_U32_F32 + : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; +defm TLD4_UNIFIED_A_2D_U32_F32 + : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; -def SULD_1D_ARRAY_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; +} -def SULD_2D_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_ARRAY_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; +//=== Surface load instructions -def SULD_3D_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; +let IsSuld = true in { + +class SULD_1D_base + : NVPTXInst<(outs outtype:$r), + !con(surf, (ins Int32Regs:$x)), + inst # " \\{$r\\}, [$s, \\{$x\\}];", + []>; +multiclass SULD_1D { + def _R : SULD_1D_base; + def _I : SULD_1D_base; } -// .zero variant -let IsSuld = true in { -def SULD_1D_I8_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I16_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I32_ZERO - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I64_ZERO - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];", - []>; +defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>; +defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>; +defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>; +defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>; + +defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>; +defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>; +defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>; +defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>; + +defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>; +defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>; +defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>; +defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>; + +class SULD_1D_ARRAY_base + : NVPTXInst<(outs outtype:$r), + !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), + inst # " \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +multiclass SULD_1D_ARRAY { + def _R : SULD_1D_ARRAY_base; + def _I : SULD_1D_ARRAY_base; +} -def SULD_1D_ARRAY_I8_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I16_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I32_ZERO - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I64_ZERO - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; +defm SULD_1D_ARRAY_I8_CLAMP + : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>; +defm SULD_1D_ARRAY_I16_CLAMP + : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>; +defm SULD_1D_ARRAY_I32_CLAMP + : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>; +defm SULD_1D_ARRAY_I64_CLAMP + : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>; + +defm SULD_1D_ARRAY_I8_TRAP + : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>; +defm SULD_1D_ARRAY_I16_TRAP + : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>; +defm SULD_1D_ARRAY_I32_TRAP + : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>; +defm SULD_1D_ARRAY_I64_TRAP + : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>; + +defm SULD_1D_ARRAY_I8_ZERO + : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>; +defm SULD_1D_ARRAY_I16_ZERO + : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>; +defm SULD_1D_ARRAY_I32_ZERO + : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>; +defm SULD_1D_ARRAY_I64_ZERO + : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>; + +class SULD_2D_base + : NVPTXInst<(outs outtype:$r), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), + inst # " \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +multiclass SULD_2D { + def _R : SULD_2D_base; + def _I : SULD_2D_base; +} -def SULD_2D_I8_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I16_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I32_ZERO - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I64_ZERO - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; +defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>; +defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>; +defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>; +defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>; + +defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>; +defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>; +defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>; +defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>; + +defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>; +defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>; +defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>; +defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>; + +class SULD_2D_ARRAY_base + : NVPTXInst<(outs outtype:$r), + !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), + inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +multiclass SULD_2D_ARRAY { + def _R : SULD_2D_ARRAY_base; + def _I : SULD_2D_ARRAY_base; +} -def SULD_2D_ARRAY_I8_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I16_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I32_ZERO - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I64_ZERO - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; +defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>; +defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>; +defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>; +defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>; + +defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>; +defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>; +defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>; +defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>; + +defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>; +defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>; +defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>; +defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>; + +class SULD_3D_base + : NVPTXInst<(outs outtype:$r), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), + inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +multiclass SULD_3D { + def _R : SULD_3D_base; + def _I : SULD_3D_base; +} -def SULD_3D_I8_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I16_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I32_ZERO - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I64_ZERO - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; +defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>; +defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>; +defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>; +defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>; + +defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>; +defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>; +defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>; +defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>; + +defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>; +defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>; +defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>; +defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>; } let IsSuld = 2 in { -def SULD_1D_V2I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I64_ZERO - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_ARRAY_V2I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I64_ZERO - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; +class SULD_1D_V2_base + : NVPTXInst<(outs outtype:$r, outtype:$g), + !con(surf, (ins Int32Regs:$x)), + inst # " \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +multiclass SULD_1D_V2 { + def _R : SULD_1D_V2_base; + def _I : SULD_1D_V2_base; +} -def SULD_2D_V2I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I64_ZERO - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; +defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>; +defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>; +defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>; +defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>; + +defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>; +defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>; +defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>; +defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>; + +defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>; +defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>; +defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>; +defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>; + +class SULD_1D_ARRAY_V2_base + : NVPTXInst<(outs outtype:$r, outtype:$g), + !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), + inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +multiclass SULD_1D_ARRAY_V2 { + def _R : SULD_1D_ARRAY_V2_base; + def _I : SULD_1D_ARRAY_V2_base; +} -def SULD_2D_ARRAY_V2I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I64_ZERO - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; +defm SULD_1D_ARRAY_V2I8_CLAMP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>; +defm SULD_1D_ARRAY_V2I16_CLAMP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>; +defm SULD_1D_ARRAY_V2I32_CLAMP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>; +defm SULD_1D_ARRAY_V2I64_CLAMP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>; + +defm SULD_1D_ARRAY_V2I8_TRAP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>; +defm SULD_1D_ARRAY_V2I16_TRAP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>; +defm SULD_1D_ARRAY_V2I32_TRAP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>; +defm SULD_1D_ARRAY_V2I64_TRAP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>; + +defm SULD_1D_ARRAY_V2I8_ZERO + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>; +defm SULD_1D_ARRAY_V2I16_ZERO + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>; +defm SULD_1D_ARRAY_V2I32_ZERO + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>; +defm SULD_1D_ARRAY_V2I64_ZERO + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>; + +class SULD_2D_V2_base + : NVPTXInst<(outs outtype:$r, outtype:$g), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), + inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +multiclass SULD_2D_V2 { + def _R : SULD_2D_V2_base; + def _I : SULD_2D_V2_base; +} + +defm SULD_2D_V2I8_CLAMP + : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>; +defm SULD_2D_V2I16_CLAMP + : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>; +defm SULD_2D_V2I32_CLAMP + : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>; +defm SULD_2D_V2I64_CLAMP + : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>; + +defm SULD_2D_V2I8_TRAP + : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>; +defm SULD_2D_V2I16_TRAP + : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>; +defm SULD_2D_V2I32_TRAP + : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>; +defm SULD_2D_V2I64_TRAP + : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>; + +defm SULD_2D_V2I8_ZERO + : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>; +defm SULD_2D_V2I16_ZERO + : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>; +defm SULD_2D_V2I32_ZERO + : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>; +defm SULD_2D_V2I64_ZERO + : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>; + +class SULD_2D_ARRAY_V2_base + : NVPTXInst<(outs outtype:$r, outtype:$g), + !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), + inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +multiclass SULD_2D_ARRAY_V2 { + def _R : SULD_2D_ARRAY_V2_base; + def _I : SULD_2D_ARRAY_V2_base; +} + +defm SULD_2D_ARRAY_V2I8_CLAMP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>; +defm SULD_2D_ARRAY_V2I16_CLAMP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>; +defm SULD_2D_ARRAY_V2I32_CLAMP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>; +defm SULD_2D_ARRAY_V2I64_CLAMP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>; + +defm SULD_2D_ARRAY_V2I8_TRAP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>; +defm SULD_2D_ARRAY_V2I16_TRAP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>; +defm SULD_2D_ARRAY_V2I32_TRAP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>; +defm SULD_2D_ARRAY_V2I64_TRAP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>; + +defm SULD_2D_ARRAY_V2I8_ZERO + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>; +defm SULD_2D_ARRAY_V2I16_ZERO + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>; +defm SULD_2D_ARRAY_V2I32_ZERO + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>; +defm SULD_2D_ARRAY_V2I64_ZERO + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>; + +class SULD_3D_V2_base + : NVPTXInst<(outs outtype:$r, outtype:$g), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), + inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +multiclass SULD_3D_V2 { + def _R : SULD_3D_V2_base; + def _I : SULD_3D_V2_base; +} + +defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>; +defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>; +defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>; +defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>; + +defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>; +defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>; +defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>; +defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>; + +defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>; +defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>; +defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>; +defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>; -def SULD_3D_V2I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I64_ZERO - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; } let IsSuld = 3 in { -def SULD_1D_V4I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V4I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V4I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_ARRAY_V4I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V4I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V4I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; +class SULD_1D_V4_base + : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), + !con(surf, (ins Int32Regs:$x)), + inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +multiclass SULD_1D_V4 { + def _R : SULD_1D_V4_base; + def _I : SULD_1D_V4_base; +} -def SULD_2D_V4I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V4I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V4I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; +defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>; +defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>; +defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>; + +defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>; +defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>; +defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>; + +defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>; +defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>; +defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>; + +class SULD_1D_ARRAY_V4_base + : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), + !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), + inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];", + []>; +multiclass SULD_1D_ARRAY_V4 { + def _R : SULD_1D_ARRAY_V4_base; + def _I : SULD_1D_ARRAY_V4_base; +} -def SULD_2D_ARRAY_V4I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V4I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V4I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; +defm SULD_1D_ARRAY_V4I8_CLAMP + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>; +defm SULD_1D_ARRAY_V4I16_CLAMP + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>; +defm SULD_1D_ARRAY_V4I32_CLAMP + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>; + +defm SULD_1D_ARRAY_V4I8_TRAP + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>; +defm SULD_1D_ARRAY_V4I16_TRAP + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>; +defm SULD_1D_ARRAY_V4I32_TRAP + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>; + +defm SULD_1D_ARRAY_V4I8_ZERO + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>; +defm SULD_1D_ARRAY_V4I16_ZERO + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>; +defm SULD_1D_ARRAY_V4I32_ZERO + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>; + +class SULD_2D_V4_base + : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), + inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +multiclass SULD_2D_V4 { + def _R : SULD_2D_V4_base; + def _I : SULD_2D_V4_base; +} +defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>; +defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>; +defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>; + +defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>; +defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>; +defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>; + +defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>; +defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>; +defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>; + +class SULD_2D_ARRAY_V4_base + : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), + !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), + inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +multiclass SULD_2D_ARRAY_V4 { + def _R : SULD_2D_ARRAY_V4_base; + def _I : SULD_2D_ARRAY_V4_base; +} + +defm SULD_2D_ARRAY_V4I8_CLAMP + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>; +defm SULD_2D_ARRAY_V4I16_CLAMP + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>; +defm SULD_2D_ARRAY_V4I32_CLAMP + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>; + +defm SULD_2D_ARRAY_V4I8_TRAP + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>; +defm SULD_2D_ARRAY_V4I16_TRAP + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>; +defm SULD_2D_ARRAY_V4I32_TRAP + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>; + +defm SULD_2D_ARRAY_V4I8_ZERO + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>; +defm SULD_2D_ARRAY_V4I16_ZERO + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>; +defm SULD_2D_ARRAY_V4I32_ZERO + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>; + +class SULD_3D_V4_base + : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), + inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +multiclass SULD_3D_V4 { + def _R : SULD_3D_V4_base; + def _I : SULD_3D_V4_base; +} + +defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>; +defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>; +defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>; + +defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>; +defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>; +defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>; + +defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>; +defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>; +defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>; -def SULD_3D_V4I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V4I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V4I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; } //----------------------------------- @@ -4768,56 +4028,88 @@ //----------------------------------- let IsSurfTexQuery = true in { -def TXQ_CHANNEL_ORDER +def TXQ_CHANNEL_ORDER_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.channel_order.b32 \t$d, [$a];", []>; -def TXQ_CHANNEL_DATA_TYPE +def TXQ_CHANNEL_ORDER_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.channel_order.b32 \t$d, [$a];", + []>; +def TXQ_CHANNEL_DATA_TYPE_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.channel_data_type.b32 \t$d, [$a];", []>; -def TXQ_WIDTH +def TXQ_CHANNEL_DATA_TYPE_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.channel_data_type.b32 \t$d, [$a];", + []>; +def TXQ_WIDTH_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.width.b32 \t$d, [$a];", []>; -def TXQ_HEIGHT +def TXQ_WIDTH_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.width.b32 \t$d, [$a];", + []>; +def TXQ_HEIGHT_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.height.b32 \t$d, [$a];", []>; -def TXQ_DEPTH +def TXQ_HEIGHT_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.height.b32 \t$d, [$a];", + []>; +def TXQ_DEPTH_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.depth.b32 \t$d, [$a];", []>; -def TXQ_ARRAY_SIZE +def TXQ_DEPTH_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.depth.b32 \t$d, [$a];", + []>; +def TXQ_ARRAY_SIZE_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.array_size.b32 \t$d, [$a];", []>; -def TXQ_NUM_SAMPLES +def TXQ_ARRAY_SIZE_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.array_size.b32 \t$d, [$a];", + []>; +def TXQ_NUM_SAMPLES_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.num_samples.b32 \t$d, [$a];", []>; -def TXQ_NUM_MIPMAP_LEVELS +def TXQ_NUM_SAMPLES_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.num_samples.b32 \t$d, [$a];", + []>; +def TXQ_NUM_MIPMAP_LEVELS_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.num_mipmap_levels.b32 \t$d, [$a];", []>; +def TXQ_NUM_MIPMAP_LEVELS_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.num_mipmap_levels.b32 \t$d, [$a];", + []>; } def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), - (TXQ_CHANNEL_ORDER Int64Regs:$a)>; + (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>; def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), - (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; + (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; def : Pat<(int_nvvm_txq_width Int64Regs:$a), - (TXQ_WIDTH Int64Regs:$a)>; + (TXQ_WIDTH_R Int64Regs:$a)>; def : Pat<(int_nvvm_txq_height Int64Regs:$a), - (TXQ_HEIGHT Int64Regs:$a)>; + (TXQ_HEIGHT_R Int64Regs:$a)>; def : Pat<(int_nvvm_txq_depth Int64Regs:$a), - (TXQ_DEPTH Int64Regs:$a)>; + (TXQ_DEPTH_R Int64Regs:$a)>; def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), - (TXQ_ARRAY_SIZE Int64Regs:$a)>; + (TXQ_ARRAY_SIZE_R Int64Regs:$a)>; def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), - (TXQ_NUM_SAMPLES Int64Regs:$a)>; + (TXQ_NUM_SAMPLES_R Int64Regs:$a)>; def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), - (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>; + (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>; //----------------------------------- @@ -4825,44 +4117,68 @@ //----------------------------------- let IsSurfTexQuery = true in { -def SUQ_CHANNEL_ORDER +def SUQ_CHANNEL_ORDER_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "suq.channel_order.b32 \t$d, [$a];", []>; -def SUQ_CHANNEL_DATA_TYPE +def SUQ_CHANNEL_ORDER_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "suq.channel_order.b32 \t$d, [$a];", + []>; +def SUQ_CHANNEL_DATA_TYPE_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "suq.channel_data_type.b32 \t$d, [$a];", []>; -def SUQ_WIDTH +def SUQ_CHANNEL_DATA_TYPE_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "suq.channel_data_type.b32 \t$d, [$a];", + []>; +def SUQ_WIDTH_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "suq.width.b32 \t$d, [$a];", []>; -def SUQ_HEIGHT +def SUQ_WIDTH_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "suq.width.b32 \t$d, [$a];", + []>; +def SUQ_HEIGHT_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "suq.height.b32 \t$d, [$a];", []>; -def SUQ_DEPTH +def SUQ_HEIGHT_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "suq.height.b32 \t$d, [$a];", + []>; +def SUQ_DEPTH_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "suq.depth.b32 \t$d, [$a];", []>; -def SUQ_ARRAY_SIZE +def SUQ_DEPTH_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "suq.depth.b32 \t$d, [$a];", + []>; +def SUQ_ARRAY_SIZE_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "suq.array_size.b32 \t$d, [$a];", []>; +def SUQ_ARRAY_SIZE_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "suq.array_size.b32 \t$d, [$a];", + []>; } def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), - (SUQ_CHANNEL_ORDER Int64Regs:$a)>; + (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>; def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), - (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; + (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; def : Pat<(int_nvvm_suq_width Int64Regs:$a), - (SUQ_WIDTH Int64Regs:$a)>; + (SUQ_WIDTH_R Int64Regs:$a)>; def : Pat<(int_nvvm_suq_height Int64Regs:$a), - (SUQ_HEIGHT Int64Regs:$a)>; + (SUQ_HEIGHT_R Int64Regs:$a)>; def : Pat<(int_nvvm_suq_depth Int64Regs:$a), - (SUQ_DEPTH Int64Regs:$a)>; + (SUQ_DEPTH_R Int64Regs:$a)>; def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), - (SUQ_ARRAY_SIZE Int64Regs:$a)>; + (SUQ_ARRAY_SIZE_R Int64Regs:$a)>; //===- Handle Query -------------------------------------------------------===// @@ -4884,1329 +4200,522 @@ //===- Surface Stores -----------------------------------------------------===// let IsSust = true in { -// Unformatted -// .clamp variant -def SUST_B_1D_B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), - "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_V2B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V4B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_V4B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_V4B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_1D_ARRAY_B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), - "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), - "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_V2B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g), - "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, - Int64Regs:$g), - "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V4B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_ARRAY_V4B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_ARRAY_V4B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_2D_B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_V2B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g), - "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, - Int64Regs:$g), - "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V4B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_V4B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_V4B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_2D_ARRAY_B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r), - "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int64Regs:$r), - "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_V2B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g), - "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int64Regs:$r, Int64Regs:$g), - "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V4B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_ARRAY_V4B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_ARRAY_V4B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_3D_B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r), - "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int64Regs:$r), - "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_V2B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g), - "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int64Regs:$r, Int64Regs:$g), - "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V4B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_3D_V4B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_3D_V4B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -// .trap variant -def SUST_B_1D_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), - "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_1D_ARRAY_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), - "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), - "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g), - "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, - Int64Regs:$g), - "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_ARRAY_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_ARRAY_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_2D_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g), - "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, - Int64Regs:$g), - "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_2D_ARRAY_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r), - "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int64Regs:$r), - "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g), - "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int64Regs:$r, Int64Regs:$g), - "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_ARRAY_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_ARRAY_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_3D_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r), - "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int64Regs:$r), - "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g), - "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int64Regs:$r, Int64Regs:$g), - "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_3D_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_3D_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -// .zero variant -def SUST_B_1D_B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), - "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_V2B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V4B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_V4B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_V4B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; +class SUST_1D_base + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, intype:$r)), + inst # " \t[$s, \\{$x\\}], \\{$r\\};", + []>; +multiclass SUST_1D { + def _R : SUST_1D_base; + def _I : SUST_1D_base; +} -def SUST_B_1D_ARRAY_B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), - "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), - "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_V2B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g), - "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, - Int64Regs:$g), - "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V4B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_ARRAY_V4B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_ARRAY_V4B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>; +defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>; +defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>; +defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>; + +defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>; +defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>; +defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>; +defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>; + +defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>; +defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>; +defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>; +defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>; + +defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>; +defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>; +defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>; + +class SUST_1D_V2_base + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)), + inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +multiclass SUST_1D_V2 { + def _R : SUST_1D_V2_base; + def _I : SUST_1D_V2_base; +} +defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>; +defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>; +defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>; +defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>; + +defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>; +defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>; +defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>; +defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>; + +defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>; +defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>; +defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>; +defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>; + +defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>; +defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>; +defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>; + +class SUST_1D_V4_base + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g, + intype:$b, intype:$a)), + inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +multiclass SUST_1D_V4 { + def _R : SUST_1D_V4_base; + def _I : SUST_1D_V4_base; +} -def SUST_B_2D_B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_V2B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g), - "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, - Int64Regs:$g), - "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V4B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_V4B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_V4B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>; +defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>; +defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>; + +defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>; +defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>; +defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>; + +defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>; +defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>; +defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>; + +defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>; +defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>; +defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>; + +class SUST_1D_ARRAY_base + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)), + inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +multiclass SUST_1D_ARRAY { + def _R : SUST_1D_ARRAY_base; + def _I : SUST_1D_ARRAY_base; +} +defm SUST_B_1D_ARRAY_B8_CLAMP + : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>; +defm SUST_B_1D_ARRAY_B16_CLAMP + : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>; +defm SUST_B_1D_ARRAY_B32_CLAMP + : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>; +defm SUST_B_1D_ARRAY_B64_CLAMP + : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>; + +defm SUST_B_1D_ARRAY_B8_TRAP + : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>; +defm SUST_B_1D_ARRAY_B16_TRAP + : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>; +defm SUST_B_1D_ARRAY_B32_TRAP + : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>; +defm SUST_B_1D_ARRAY_B64_TRAP + : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>; + +defm SUST_B_1D_ARRAY_B8_ZERO + : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>; +defm SUST_B_1D_ARRAY_B16_ZERO + : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>; +defm SUST_B_1D_ARRAY_B32_ZERO + : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>; +defm SUST_B_1D_ARRAY_B64_ZERO + : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>; + +defm SUST_P_1D_ARRAY_B8_TRAP + : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>; +defm SUST_P_1D_ARRAY_B16_TRAP + : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>; +defm SUST_P_1D_ARRAY_B32_TRAP + : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>; + +class SUST_1D_ARRAY_V2_base + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, + intype:$r, intype:$g)), + inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +multiclass SUST_1D_ARRAY_V2 { + def _R : SUST_1D_ARRAY_V2_base; + def _I : SUST_1D_ARRAY_V2_base; +} -def SUST_B_2D_ARRAY_B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r), - "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int64Regs:$r), - "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_V2B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g), - "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int64Regs:$r, Int64Regs:$g), - "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V4B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_ARRAY_V4B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_ARRAY_V4B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_1D_ARRAY_V2B8_CLAMP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>; +defm SUST_B_1D_ARRAY_V2B16_CLAMP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>; +defm SUST_B_1D_ARRAY_V2B32_CLAMP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>; +defm SUST_B_1D_ARRAY_V2B64_CLAMP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>; + +defm SUST_B_1D_ARRAY_V2B8_TRAP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>; +defm SUST_B_1D_ARRAY_V2B16_TRAP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>; +defm SUST_B_1D_ARRAY_V2B32_TRAP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>; +defm SUST_B_1D_ARRAY_V2B64_TRAP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>; + +defm SUST_B_1D_ARRAY_V2B8_ZERO + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>; +defm SUST_B_1D_ARRAY_V2B16_ZERO + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>; +defm SUST_B_1D_ARRAY_V2B32_ZERO + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>; +defm SUST_B_1D_ARRAY_V2B64_ZERO + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>; + +defm SUST_P_1D_ARRAY_V2B8_TRAP + : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>; +defm SUST_P_1D_ARRAY_V2B16_TRAP + : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>; +defm SUST_P_1D_ARRAY_V2B32_TRAP + : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>; + +class SUST_1D_ARRAY_V4_base + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, + intype:$r, intype:$g, intype:$b, intype:$a)), + inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};", + []>; +multiclass SUST_1D_ARRAY_V4 { + def _R : SUST_1D_ARRAY_V4_base; + def _I : SUST_1D_ARRAY_V4_base; +} +defm SUST_B_1D_ARRAY_V4B8_CLAMP + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>; +defm SUST_B_1D_ARRAY_V4B16_CLAMP + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>; +defm SUST_B_1D_ARRAY_V4B32_CLAMP + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>; + +defm SUST_B_1D_ARRAY_V4B8_TRAP + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>; +defm SUST_B_1D_ARRAY_V4B16_TRAP + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>; +defm SUST_B_1D_ARRAY_V4B32_TRAP + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>; + +defm SUST_B_1D_ARRAY_V4B8_ZERO + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>; +defm SUST_B_1D_ARRAY_V4B16_ZERO + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>; +defm SUST_B_1D_ARRAY_V4B32_ZERO + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>; + +defm SUST_P_1D_ARRAY_V4B8_TRAP + : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>; +defm SUST_P_1D_ARRAY_V4B16_TRAP + : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>; +defm SUST_P_1D_ARRAY_V4B32_TRAP + : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>; + +class SUST_2D_base + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)), + inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +multiclass SUST_2D { + def _R : SUST_2D_base; + def _I : SUST_2D_base; +} -def SUST_B_3D_B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r), - "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int64Regs:$r), - "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_V2B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g), - "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int64Regs:$r, Int64Regs:$g), - "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V4B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_3D_V4B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_3D_V4B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>; +defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>; +defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>; +defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>; + +defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>; +defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>; +defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>; +defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>; + +defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>; +defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>; +defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>; +defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>; + +defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>; +defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>; +defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>; + +class SUST_2D_V2_base + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, + intype:$r, intype:$g)), + inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +multiclass SUST_2D_V2 { + def _R : SUST_2D_V2_base; + def _I : SUST_2D_V2_base; +} +defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>; +defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>; +defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>; +defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>; + +defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>; +defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>; +defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>; +defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>; + +defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>; +defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>; +defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>; +defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>; + +defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>; +defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>; +defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>; + +class SUST_2D_V4_base + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, + intype:$r, intype:$g, intype:$b, intype:$a)), + inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};", + []>; +multiclass SUST_2D_V4 { + def _R : SUST_2D_V4_base; + def _I : SUST_2D_V4_base; +} +defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>; +defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>; +defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>; + +defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>; +defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>; +defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>; + +defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>; +defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>; +defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>; + +defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>; +defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>; +defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>; + +class SUST_2D_ARRAY_base + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + intype:$r)), + inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +multiclass SUST_2D_ARRAY { + def _R : SUST_2D_ARRAY_base; + def _I : SUST_2D_ARRAY_base; +} -// Formatted +defm SUST_B_2D_ARRAY_B8_CLAMP + : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>; +defm SUST_B_2D_ARRAY_B16_CLAMP + : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>; +defm SUST_B_2D_ARRAY_B32_CLAMP + : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>; +defm SUST_B_2D_ARRAY_B64_CLAMP + : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>; + +defm SUST_B_2D_ARRAY_B8_TRAP + : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>; +defm SUST_B_2D_ARRAY_B16_TRAP + : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>; +defm SUST_B_2D_ARRAY_B32_TRAP + : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>; +defm SUST_B_2D_ARRAY_B64_TRAP + : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>; + +defm SUST_B_2D_ARRAY_B8_ZERO + : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>; +defm SUST_B_2D_ARRAY_B16_ZERO + : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>; +defm SUST_B_2D_ARRAY_B32_ZERO + : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>; +defm SUST_B_2D_ARRAY_B64_ZERO + : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>; + +defm SUST_P_2D_ARRAY_B8_TRAP + : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>; +defm SUST_P_2D_ARRAY_B16_TRAP + : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>; +defm SUST_P_2D_ARRAY_B32_TRAP + : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>; + +class SUST_2D_ARRAY_V2_base + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + intype:$r, intype:$g)), + inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};", + []>; +multiclass SUST_2D_ARRAY_V2 { + def _R : SUST_2D_ARRAY_V2_base; + def _I : SUST_2D_ARRAY_V2_base; +} -def SUST_P_1D_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_P_1D_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_P_1D_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_P_1D_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_P_1D_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_P_1D_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_P_1D_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_P_1D_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_P_1D_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_2D_ARRAY_V2B8_CLAMP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>; +defm SUST_B_2D_ARRAY_V2B16_CLAMP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>; +defm SUST_B_2D_ARRAY_V2B32_CLAMP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>; +defm SUST_B_2D_ARRAY_V2B64_CLAMP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>; + +defm SUST_B_2D_ARRAY_V2B8_TRAP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>; +defm SUST_B_2D_ARRAY_V2B16_TRAP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>; +defm SUST_B_2D_ARRAY_V2B32_TRAP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>; +defm SUST_B_2D_ARRAY_V2B64_TRAP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>; + +defm SUST_B_2D_ARRAY_V2B8_ZERO + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>; +defm SUST_B_2D_ARRAY_V2B16_ZERO + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>; +defm SUST_B_2D_ARRAY_V2B32_ZERO + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>; +defm SUST_B_2D_ARRAY_V2B64_ZERO + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>; + +defm SUST_P_2D_ARRAY_V2B8_TRAP + : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>; +defm SUST_P_2D_ARRAY_V2B16_TRAP + : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>; +defm SUST_P_2D_ARRAY_V2B32_TRAP + : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>; + +class SUST_2D_ARRAY_V4_base + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + intype:$r, intype:$g, intype:$b, intype:$a)), + inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};", + []>; +multiclass SUST_2D_ARRAY_V4 { + def _R : SUST_2D_ARRAY_V4_base; + def _I : SUST_2D_ARRAY_V4_base; +} +defm SUST_B_2D_ARRAY_V4B8_CLAMP + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>; +defm SUST_B_2D_ARRAY_V4B16_CLAMP + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>; +defm SUST_B_2D_ARRAY_V4B32_CLAMP + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>; + +defm SUST_B_2D_ARRAY_V4B8_TRAP + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>; +defm SUST_B_2D_ARRAY_V4B16_TRAP + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>; +defm SUST_B_2D_ARRAY_V4B32_TRAP + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>; + +defm SUST_B_2D_ARRAY_V4B8_ZERO + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>; +defm SUST_B_2D_ARRAY_V4B16_ZERO + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>; +defm SUST_B_2D_ARRAY_V4B32_ZERO + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>; + +defm SUST_P_2D_ARRAY_V4B8_TRAP + : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>; +defm SUST_P_2D_ARRAY_V4B16_TRAP + : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>; +defm SUST_P_2D_ARRAY_V4B32_TRAP + : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>; + +class SUST_3D_base + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + intype:$r)), + inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +multiclass SUST_3D { + def _R : SUST_3D_base; + def _I : SUST_3D_base; +} -def SUST_P_1D_ARRAY_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_P_1D_ARRAY_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_P_1D_ARRAY_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), - "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_P_1D_ARRAY_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_P_1D_ARRAY_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_P_1D_ARRAY_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g), - "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_P_1D_ARRAY_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_1D_ARRAY_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_1D_ARRAY_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>; +defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>; +defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>; +defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>; + +defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>; +defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>; +defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>; +defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>; + +defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>; +defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>; +defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>; +defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>; + +defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>; +defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>; +defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>; + +class SUST_3D_V2_base + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + intype:$r, intype:$g)), + inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};", + []>; +multiclass SUST_3D_V2 { + def _R : SUST_3D_V2_base; + def _I : SUST_3D_V2_base; +} +defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>; +defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>; +defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>; +defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>; + +defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>; +defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>; +defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>; +defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>; + +defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>; +defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>; +defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>; +defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>; + +defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>; +defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>; +defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>; + +class SUST_3D_V4_base + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + intype:$r, intype:$g, intype:$b, intype:$a)), + inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};", + []>; +multiclass SUST_3D_V4 { + def _R : SUST_3D_V4_base; + def _I : SUST_3D_V4_base; +} -def SUST_P_2D_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_P_2D_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_P_2D_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_P_2D_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_P_2D_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_P_2D_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g), - "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_P_2D_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_2D_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_2D_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>; +defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>; +defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>; +defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>; +defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>; +defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>; -def SUST_P_2D_ARRAY_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_P_2D_ARRAY_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_P_2D_ARRAY_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r), - "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_P_2D_ARRAY_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_P_2D_ARRAY_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_P_2D_ARRAY_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g), - "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_P_2D_ARRAY_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_2D_ARRAY_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_2D_ARRAY_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>; +defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>; +defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>; +defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>; +defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>; +defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>; -def SUST_P_3D_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_P_3D_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_P_3D_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r), - "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_P_3D_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_P_3D_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_P_3D_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g), - "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_P_3D_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_3D_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_3D_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; } // Surface store instruction patterns @@ -6216,248 +4725,248 @@ // .clamp variant def : Pat<(int_nvvm_sust_b_1d_i8_clamp Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i16_clamp Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i64_clamp Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), - (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; + (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), - (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), - (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_i8_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i16_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i64_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), - (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s, + (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s, + (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s, + (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s, + (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), - (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s, + (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s, + (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; @@ -6466,77 +4975,77 @@ def : Pat<(int_nvvm_sust_b_3d_i8_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_B_3D_B8_CLAMP Int64Regs:$s, + (SUST_B_3D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i16_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_B_3D_B16_CLAMP Int64Regs:$s, + (SUST_B_3D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r), - (SUST_B_3D_B32_CLAMP Int64Regs:$s, + (SUST_B_3D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i64_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r), - (SUST_B_3D_B64_CLAMP Int64Regs:$s, + (SUST_B_3D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_B_3D_V2B8_CLAMP Int64Regs:$s, + (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_B_3D_V2B16_CLAMP Int64Regs:$s, + (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g), - (SUST_B_3D_V2B32_CLAMP Int64Regs:$s, + (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r, Int64Regs:$g), - (SUST_B_3D_V2B64_CLAMP Int64Regs:$s, + (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_3D_V4B8_CLAMP Int64Regs:$s, + (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_3D_V4B16_CLAMP Int64Regs:$s, + (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_3D_V4B32_CLAMP Int64Regs:$s, + (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; @@ -6544,248 +5053,248 @@ // .trap variant def : Pat<(int_nvvm_sust_b_1d_i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i16_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i64_trap Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), - (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; + (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i16_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i64_trap Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_v4i16_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_v4i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), - (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i64_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), - (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i64_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i64_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), - (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_v4i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_v4i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s, + (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s, + (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s, + (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i64_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s, + (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), - (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s, + (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s, + (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; @@ -6794,77 +5303,77 @@ def : Pat<(int_nvvm_sust_b_3d_i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_B_3D_B8_TRAP Int64Regs:$s, + (SUST_B_3D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_B_3D_B16_TRAP Int64Regs:$s, + (SUST_B_3D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r), - (SUST_B_3D_B32_TRAP Int64Regs:$s, + (SUST_B_3D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i64_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r), - (SUST_B_3D_B64_TRAP Int64Regs:$s, + (SUST_B_3D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_B_3D_V2B8_TRAP Int64Regs:$s, + (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_B_3D_V2B16_TRAP Int64Regs:$s, + (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g), - (SUST_B_3D_V2B32_TRAP Int64Regs:$s, + (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i64_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r, Int64Regs:$g), - (SUST_B_3D_V2B64_TRAP Int64Regs:$s, + (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_3D_V4B8_TRAP Int64Regs:$s, + (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_3d_v4i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_3D_V4B16_TRAP Int64Regs:$s, + (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_3d_v4i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_3D_V4B32_TRAP Int64Regs:$s, + (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; @@ -6872,248 +5381,248 @@ // .zero variant def : Pat<(int_nvvm_sust_b_1d_i8_zero Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i16_zero Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i64_zero Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), - (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; + (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_v2i8_zero Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i16_zero Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i64_zero Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v4i8_zero Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_v4i16_zero Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_v4i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_i8_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i16_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i32_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), - (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i64_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), - (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_i8_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i16_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i64_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_v2i8_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i16_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i64_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), - (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v4i8_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_v4i16_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_v4i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_i8_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s, + (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i16_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s, + (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i32_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s, + (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i64_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s, + (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), - (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s, + (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s, + (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; @@ -7122,77 +5631,77 @@ def : Pat<(int_nvvm_sust_b_3d_i8_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_B_3D_B8_ZERO Int64Regs:$s, + (SUST_B_3D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i16_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_B_3D_B16_ZERO Int64Regs:$s, + (SUST_B_3D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r), - (SUST_B_3D_B32_ZERO Int64Regs:$s, + (SUST_B_3D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i64_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r), - (SUST_B_3D_B64_ZERO Int64Regs:$s, + (SUST_B_3D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_v2i8_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_B_3D_V2B8_ZERO Int64Regs:$s, + (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i16_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_B_3D_V2B16_ZERO Int64Regs:$s, + (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g), - (SUST_B_3D_V2B32_ZERO Int64Regs:$s, + (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i64_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r, Int64Regs:$g), - (SUST_B_3D_V2B64_ZERO Int64Regs:$s, + (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v4i8_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_3D_V4B8_ZERO Int64Regs:$s, + (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_3d_v4i16_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_3D_V4B16_ZERO Int64Regs:$s, + (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_3d_v4i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_3D_V4B32_ZERO Int64Regs:$s, + (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; @@ -7201,207 +5710,207 @@ def : Pat<(int_nvvm_sust_p_1d_i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_1d_i16_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_1d_i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_p_1d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_1d_v2i16_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_1d_v2i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_p_1d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_1d_v4i16_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_1d_v4i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_p_1d_array_i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_1d_array_i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_1d_array_i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), - (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_p_2d_i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_2d_i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_2d_i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_p_2d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_2d_v2i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_2d_v2i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_p_2d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_2d_v4i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_2d_v4i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_p_2d_array_i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s, + (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_2d_array_i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s, + (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_2d_array_i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s, + (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s, + (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s, + (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; @@ -7410,63 +5919,63 @@ def : Pat<(int_nvvm_sust_p_3d_i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_P_3D_B8_TRAP Int64Regs:$s, + (SUST_P_3D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_3d_i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_P_3D_B16_TRAP Int64Regs:$s, + (SUST_P_3D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_3d_i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r), - (SUST_P_3D_B32_TRAP Int64Regs:$s, + (SUST_P_3D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_p_3d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_P_3D_V2B8_TRAP Int64Regs:$s, + (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_3d_v2i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_P_3D_V2B16_TRAP Int64Regs:$s, + (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_3d_v2i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g), - (SUST_P_3D_V2B32_TRAP Int64Regs:$s, + (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_p_3d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_3D_V4B8_TRAP Int64Regs:$s, + (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_3d_v4i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_3D_V4B16_TRAP Int64Regs:$s, + (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_3d_v4i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_P_3D_V4B32_TRAP Int64Regs:$s, + (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp --- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -41,7 +41,7 @@ } private: bool processInstr(MachineInstr &MI); - void replaceImageHandle(MachineOperand &Op, MachineFunction &MF); + bool replaceImageHandle(MachineOperand &Op, MachineFunction &MF); bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx); }; @@ -76,19 +76,1675 @@ return Changed; } +static unsigned suldRegisterToIndexOpcode(unsigned RegOC) { + switch (RegOC) { + case NVPTX::SULD_1D_I8_CLAMP_R: + return NVPTX::SULD_1D_I8_CLAMP_I; + case NVPTX::SULD_1D_I16_CLAMP_R: + return NVPTX::SULD_1D_I16_CLAMP_I; + case NVPTX::SULD_1D_I32_CLAMP_R: + return NVPTX::SULD_1D_I32_CLAMP_I; + case NVPTX::SULD_1D_I64_CLAMP_R: + return NVPTX::SULD_1D_I64_CLAMP_I; + case NVPTX::SULD_1D_ARRAY_I8_CLAMP_R: + return NVPTX::SULD_1D_ARRAY_I8_CLAMP_I; + case NVPTX::SULD_1D_ARRAY_I16_CLAMP_R: + return NVPTX::SULD_1D_ARRAY_I16_CLAMP_I; + case NVPTX::SULD_1D_ARRAY_I32_CLAMP_R: + return NVPTX::SULD_1D_ARRAY_I32_CLAMP_I; + case NVPTX::SULD_1D_ARRAY_I64_CLAMP_R: + return NVPTX::SULD_1D_ARRAY_I64_CLAMP_I; + case NVPTX::SULD_2D_I8_CLAMP_R: + return NVPTX::SULD_2D_I8_CLAMP_I; + case NVPTX::SULD_2D_I16_CLAMP_R: + return NVPTX::SULD_2D_I16_CLAMP_I; + case NVPTX::SULD_2D_I32_CLAMP_R: + return NVPTX::SULD_2D_I32_CLAMP_I; + case NVPTX::SULD_2D_I64_CLAMP_R: + return NVPTX::SULD_2D_I64_CLAMP_I; + case NVPTX::SULD_2D_ARRAY_I8_CLAMP_R: + return NVPTX::SULD_2D_ARRAY_I8_CLAMP_I; + case NVPTX::SULD_2D_ARRAY_I16_CLAMP_R: + return NVPTX::SULD_2D_ARRAY_I16_CLAMP_I; + case NVPTX::SULD_2D_ARRAY_I32_CLAMP_R: + return NVPTX::SULD_2D_ARRAY_I32_CLAMP_I; + case NVPTX::SULD_2D_ARRAY_I64_CLAMP_R: + return NVPTX::SULD_2D_ARRAY_I64_CLAMP_I; + case NVPTX::SULD_3D_I8_CLAMP_R: + return NVPTX::SULD_3D_I8_CLAMP_I; + case NVPTX::SULD_3D_I16_CLAMP_R: + return NVPTX::SULD_3D_I16_CLAMP_I; + case NVPTX::SULD_3D_I32_CLAMP_R: + return NVPTX::SULD_3D_I32_CLAMP_I; + case NVPTX::SULD_3D_I64_CLAMP_R: + return NVPTX::SULD_3D_I64_CLAMP_I; + case NVPTX::SULD_1D_V2I8_CLAMP_R: + return NVPTX::SULD_1D_V2I8_CLAMP_I; + case NVPTX::SULD_1D_V2I16_CLAMP_R: + return NVPTX::SULD_1D_V2I16_CLAMP_I; + case NVPTX::SULD_1D_V2I32_CLAMP_R: + return NVPTX::SULD_1D_V2I32_CLAMP_I; + case NVPTX::SULD_1D_V2I64_CLAMP_R: + return NVPTX::SULD_1D_V2I64_CLAMP_I; + case NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R: + return NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_I; + case NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R: + return NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_I; + case NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R: + return NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_I; + case NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R: + return NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_I; + case NVPTX::SULD_2D_V2I8_CLAMP_R: + return NVPTX::SULD_2D_V2I8_CLAMP_I; + case NVPTX::SULD_2D_V2I16_CLAMP_R: + return NVPTX::SULD_2D_V2I16_CLAMP_I; + case NVPTX::SULD_2D_V2I32_CLAMP_R: + return NVPTX::SULD_2D_V2I32_CLAMP_I; + case NVPTX::SULD_2D_V2I64_CLAMP_R: + return NVPTX::SULD_2D_V2I64_CLAMP_I; + case NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R: + return NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_I; + case NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R: + return NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_I; + case NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R: + return NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_I; + case NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R: + return NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_I; + case NVPTX::SULD_3D_V2I8_CLAMP_R: + return NVPTX::SULD_3D_V2I8_CLAMP_I; + case NVPTX::SULD_3D_V2I16_CLAMP_R: + return NVPTX::SULD_3D_V2I16_CLAMP_I; + case NVPTX::SULD_3D_V2I32_CLAMP_R: + return NVPTX::SULD_3D_V2I32_CLAMP_I; + case NVPTX::SULD_3D_V2I64_CLAMP_R: + return NVPTX::SULD_3D_V2I64_CLAMP_I; + case NVPTX::SULD_1D_V4I8_CLAMP_R: + return NVPTX::SULD_1D_V4I8_CLAMP_I; + case NVPTX::SULD_1D_V4I16_CLAMP_R: + return NVPTX::SULD_1D_V4I16_CLAMP_I; + case NVPTX::SULD_1D_V4I32_CLAMP_R: + return NVPTX::SULD_1D_V4I32_CLAMP_I; + case NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R: + return NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_I; + case NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R: + return NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_I; + case NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R: + return NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_I; + case NVPTX::SULD_2D_V4I8_CLAMP_R: + return NVPTX::SULD_2D_V4I8_CLAMP_I; + case NVPTX::SULD_2D_V4I16_CLAMP_R: + return NVPTX::SULD_2D_V4I16_CLAMP_I; + case NVPTX::SULD_2D_V4I32_CLAMP_R: + return NVPTX::SULD_2D_V4I32_CLAMP_I; + case NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R: + return NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_I; + case NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R: + return NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_I; + case NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R: + return NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_I; + case NVPTX::SULD_3D_V4I8_CLAMP_R: + return NVPTX::SULD_3D_V4I8_CLAMP_I; + case NVPTX::SULD_3D_V4I16_CLAMP_R: + return NVPTX::SULD_3D_V4I16_CLAMP_I; + case NVPTX::SULD_3D_V4I32_CLAMP_R: + return NVPTX::SULD_3D_V4I32_CLAMP_I; + case NVPTX::SULD_1D_I8_TRAP_R: + return NVPTX::SULD_1D_I8_TRAP_I; + case NVPTX::SULD_1D_I16_TRAP_R: + return NVPTX::SULD_1D_I16_TRAP_I; + case NVPTX::SULD_1D_I32_TRAP_R: + return NVPTX::SULD_1D_I32_TRAP_I; + case NVPTX::SULD_1D_I64_TRAP_R: + return NVPTX::SULD_1D_I64_TRAP_I; + case NVPTX::SULD_1D_ARRAY_I8_TRAP_R: + return NVPTX::SULD_1D_ARRAY_I8_TRAP_I; + case NVPTX::SULD_1D_ARRAY_I16_TRAP_R: + return NVPTX::SULD_1D_ARRAY_I16_TRAP_I; + case NVPTX::SULD_1D_ARRAY_I32_TRAP_R: + return NVPTX::SULD_1D_ARRAY_I32_TRAP_I; + case NVPTX::SULD_1D_ARRAY_I64_TRAP_R: + return NVPTX::SULD_1D_ARRAY_I64_TRAP_I; + case NVPTX::SULD_2D_I8_TRAP_R: + return NVPTX::SULD_2D_I8_TRAP_I; + case NVPTX::SULD_2D_I16_TRAP_R: + return NVPTX::SULD_2D_I16_TRAP_I; + case NVPTX::SULD_2D_I32_TRAP_R: + return NVPTX::SULD_2D_I32_TRAP_I; + case NVPTX::SULD_2D_I64_TRAP_R: + return NVPTX::SULD_2D_I64_TRAP_I; + case NVPTX::SULD_2D_ARRAY_I8_TRAP_R: + return NVPTX::SULD_2D_ARRAY_I8_TRAP_I; + case NVPTX::SULD_2D_ARRAY_I16_TRAP_R: + return NVPTX::SULD_2D_ARRAY_I16_TRAP_I; + case NVPTX::SULD_2D_ARRAY_I32_TRAP_R: + return NVPTX::SULD_2D_ARRAY_I32_TRAP_I; + case NVPTX::SULD_2D_ARRAY_I64_TRAP_R: + return NVPTX::SULD_2D_ARRAY_I64_TRAP_I; + case NVPTX::SULD_3D_I8_TRAP_R: + return NVPTX::SULD_3D_I8_TRAP_I; + case NVPTX::SULD_3D_I16_TRAP_R: + return NVPTX::SULD_3D_I16_TRAP_I; + case NVPTX::SULD_3D_I32_TRAP_R: + return NVPTX::SULD_3D_I32_TRAP_I; + case NVPTX::SULD_3D_I64_TRAP_R: + return NVPTX::SULD_3D_I64_TRAP_I; + case NVPTX::SULD_1D_V2I8_TRAP_R: + return NVPTX::SULD_1D_V2I8_TRAP_I; + case NVPTX::SULD_1D_V2I16_TRAP_R: + return NVPTX::SULD_1D_V2I16_TRAP_I; + case NVPTX::SULD_1D_V2I32_TRAP_R: + return NVPTX::SULD_1D_V2I32_TRAP_I; + case NVPTX::SULD_1D_V2I64_TRAP_R: + return NVPTX::SULD_1D_V2I64_TRAP_I; + case NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R: + return NVPTX::SULD_1D_ARRAY_V2I8_TRAP_I; + case NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R: + return NVPTX::SULD_1D_ARRAY_V2I16_TRAP_I; + case NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R: + return NVPTX::SULD_1D_ARRAY_V2I32_TRAP_I; + case NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R: + return NVPTX::SULD_1D_ARRAY_V2I64_TRAP_I; + case NVPTX::SULD_2D_V2I8_TRAP_R: + return NVPTX::SULD_2D_V2I8_TRAP_I; + case NVPTX::SULD_2D_V2I16_TRAP_R: + return NVPTX::SULD_2D_V2I16_TRAP_I; + case NVPTX::SULD_2D_V2I32_TRAP_R: + return NVPTX::SULD_2D_V2I32_TRAP_I; + case NVPTX::SULD_2D_V2I64_TRAP_R: + return NVPTX::SULD_2D_V2I64_TRAP_I; + case NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R: + return NVPTX::SULD_2D_ARRAY_V2I8_TRAP_I; + case NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R: + return NVPTX::SULD_2D_ARRAY_V2I16_TRAP_I; + case NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R: + return NVPTX::SULD_2D_ARRAY_V2I32_TRAP_I; + case NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R: + return NVPTX::SULD_2D_ARRAY_V2I64_TRAP_I; + case NVPTX::SULD_3D_V2I8_TRAP_R: + return NVPTX::SULD_3D_V2I8_TRAP_I; + case NVPTX::SULD_3D_V2I16_TRAP_R: + return NVPTX::SULD_3D_V2I16_TRAP_I; + case NVPTX::SULD_3D_V2I32_TRAP_R: + return NVPTX::SULD_3D_V2I32_TRAP_I; + case NVPTX::SULD_3D_V2I64_TRAP_R: + return NVPTX::SULD_3D_V2I64_TRAP_I; + case NVPTX::SULD_1D_V4I8_TRAP_R: + return NVPTX::SULD_1D_V4I8_TRAP_I; + case NVPTX::SULD_1D_V4I16_TRAP_R: + return NVPTX::SULD_1D_V4I16_TRAP_I; + case NVPTX::SULD_1D_V4I32_TRAP_R: + return NVPTX::SULD_1D_V4I32_TRAP_I; + case NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R: + return NVPTX::SULD_1D_ARRAY_V4I8_TRAP_I; + case NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R: + return NVPTX::SULD_1D_ARRAY_V4I16_TRAP_I; + case NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R: + return NVPTX::SULD_1D_ARRAY_V4I32_TRAP_I; + case NVPTX::SULD_2D_V4I8_TRAP_R: + return NVPTX::SULD_2D_V4I8_TRAP_I; + case NVPTX::SULD_2D_V4I16_TRAP_R: + return NVPTX::SULD_2D_V4I16_TRAP_I; + case NVPTX::SULD_2D_V4I32_TRAP_R: + return NVPTX::SULD_2D_V4I32_TRAP_I; + case NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R: + return NVPTX::SULD_2D_ARRAY_V4I8_TRAP_I; + case NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R: + return NVPTX::SULD_2D_ARRAY_V4I16_TRAP_I; + case NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R: + return NVPTX::SULD_2D_ARRAY_V4I32_TRAP_I; + case NVPTX::SULD_3D_V4I8_TRAP_R: + return NVPTX::SULD_3D_V4I8_TRAP_I; + case NVPTX::SULD_3D_V4I16_TRAP_R: + return NVPTX::SULD_3D_V4I16_TRAP_I; + case NVPTX::SULD_3D_V4I32_TRAP_R: + return NVPTX::SULD_3D_V4I32_TRAP_I; + case NVPTX::SULD_1D_I8_ZERO_R: + return NVPTX::SULD_1D_I8_ZERO_I; + case NVPTX::SULD_1D_I16_ZERO_R: + return NVPTX::SULD_1D_I16_ZERO_I; + case NVPTX::SULD_1D_I32_ZERO_R: + return NVPTX::SULD_1D_I32_ZERO_I; + case NVPTX::SULD_1D_I64_ZERO_R: + return NVPTX::SULD_1D_I64_ZERO_I; + case NVPTX::SULD_1D_ARRAY_I8_ZERO_R: + return NVPTX::SULD_1D_ARRAY_I8_ZERO_I; + case NVPTX::SULD_1D_ARRAY_I16_ZERO_R: + return NVPTX::SULD_1D_ARRAY_I16_ZERO_I; + case NVPTX::SULD_1D_ARRAY_I32_ZERO_R: + return NVPTX::SULD_1D_ARRAY_I32_ZERO_I; + case NVPTX::SULD_1D_ARRAY_I64_ZERO_R: + return NVPTX::SULD_1D_ARRAY_I64_ZERO_I; + case NVPTX::SULD_2D_I8_ZERO_R: + return NVPTX::SULD_2D_I8_ZERO_I; + case NVPTX::SULD_2D_I16_ZERO_R: + return NVPTX::SULD_2D_I16_ZERO_I; + case NVPTX::SULD_2D_I32_ZERO_R: + return NVPTX::SULD_2D_I32_ZERO_I; + case NVPTX::SULD_2D_I64_ZERO_R: + return NVPTX::SULD_2D_I64_ZERO_I; + case NVPTX::SULD_2D_ARRAY_I8_ZERO_R: + return NVPTX::SULD_2D_ARRAY_I8_ZERO_I; + case NVPTX::SULD_2D_ARRAY_I16_ZERO_R: + return NVPTX::SULD_2D_ARRAY_I16_ZERO_I; + case NVPTX::SULD_2D_ARRAY_I32_ZERO_R: + return NVPTX::SULD_2D_ARRAY_I32_ZERO_I; + case NVPTX::SULD_2D_ARRAY_I64_ZERO_R: + return NVPTX::SULD_2D_ARRAY_I64_ZERO_I; + case NVPTX::SULD_3D_I8_ZERO_R: + return NVPTX::SULD_3D_I8_ZERO_I; + case NVPTX::SULD_3D_I16_ZERO_R: + return NVPTX::SULD_3D_I16_ZERO_I; + case NVPTX::SULD_3D_I32_ZERO_R: + return NVPTX::SULD_3D_I32_ZERO_I; + case NVPTX::SULD_3D_I64_ZERO_R: + return NVPTX::SULD_3D_I64_ZERO_I; + case NVPTX::SULD_1D_V2I8_ZERO_R: + return NVPTX::SULD_1D_V2I8_ZERO_I; + case NVPTX::SULD_1D_V2I16_ZERO_R: + return NVPTX::SULD_1D_V2I16_ZERO_I; + case NVPTX::SULD_1D_V2I32_ZERO_R: + return NVPTX::SULD_1D_V2I32_ZERO_I; + case NVPTX::SULD_1D_V2I64_ZERO_R: + return NVPTX::SULD_1D_V2I64_ZERO_I; + case NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R: + return NVPTX::SULD_1D_ARRAY_V2I8_ZERO_I; + case NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R: + return NVPTX::SULD_1D_ARRAY_V2I16_ZERO_I; + case NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R: + return NVPTX::SULD_1D_ARRAY_V2I32_ZERO_I; + case NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R: + return NVPTX::SULD_1D_ARRAY_V2I64_ZERO_I; + case NVPTX::SULD_2D_V2I8_ZERO_R: + return NVPTX::SULD_2D_V2I8_ZERO_I; + case NVPTX::SULD_2D_V2I16_ZERO_R: + return NVPTX::SULD_2D_V2I16_ZERO_I; + case NVPTX::SULD_2D_V2I32_ZERO_R: + return NVPTX::SULD_2D_V2I32_ZERO_I; + case NVPTX::SULD_2D_V2I64_ZERO_R: + return NVPTX::SULD_2D_V2I64_ZERO_I; + case NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R: + return NVPTX::SULD_2D_ARRAY_V2I8_ZERO_I; + case NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R: + return NVPTX::SULD_2D_ARRAY_V2I16_ZERO_I; + case NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R: + return NVPTX::SULD_2D_ARRAY_V2I32_ZERO_I; + case NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R: + return NVPTX::SULD_2D_ARRAY_V2I64_ZERO_I; + case NVPTX::SULD_3D_V2I8_ZERO_R: + return NVPTX::SULD_3D_V2I8_ZERO_I; + case NVPTX::SULD_3D_V2I16_ZERO_R: + return NVPTX::SULD_3D_V2I16_ZERO_I; + case NVPTX::SULD_3D_V2I32_ZERO_R: + return NVPTX::SULD_3D_V2I32_ZERO_I; + case NVPTX::SULD_3D_V2I64_ZERO_R: + return NVPTX::SULD_3D_V2I64_ZERO_I; + case NVPTX::SULD_1D_V4I8_ZERO_R: + return NVPTX::SULD_1D_V4I8_ZERO_I; + case NVPTX::SULD_1D_V4I16_ZERO_R: + return NVPTX::SULD_1D_V4I16_ZERO_I; + case NVPTX::SULD_1D_V4I32_ZERO_R: + return NVPTX::SULD_1D_V4I32_ZERO_I; + case NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R: + return NVPTX::SULD_1D_ARRAY_V4I8_ZERO_I; + case NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R: + return NVPTX::SULD_1D_ARRAY_V4I16_ZERO_I; + case NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R: + return NVPTX::SULD_1D_ARRAY_V4I32_ZERO_I; + case NVPTX::SULD_2D_V4I8_ZERO_R: + return NVPTX::SULD_2D_V4I8_ZERO_I; + case NVPTX::SULD_2D_V4I16_ZERO_R: + return NVPTX::SULD_2D_V4I16_ZERO_I; + case NVPTX::SULD_2D_V4I32_ZERO_R: + return NVPTX::SULD_2D_V4I32_ZERO_I; + case NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R: + return NVPTX::SULD_2D_ARRAY_V4I8_ZERO_I; + case NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R: + return NVPTX::SULD_2D_ARRAY_V4I16_ZERO_I; + case NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R: + return NVPTX::SULD_2D_ARRAY_V4I32_ZERO_I; + case NVPTX::SULD_3D_V4I8_ZERO_R: + return NVPTX::SULD_3D_V4I8_ZERO_I; + case NVPTX::SULD_3D_V4I16_ZERO_R: + return NVPTX::SULD_3D_V4I16_ZERO_I; + case NVPTX::SULD_3D_V4I32_ZERO_R: + return NVPTX::SULD_3D_V4I32_ZERO_I; + default: + llvm_unreachable("Unhandled SULD opcode"); + } +} + +static unsigned sustRegisterToIndexOpcode(unsigned RegOC) { + switch (RegOC) { + case NVPTX::SUST_B_1D_B8_CLAMP_R: + return NVPTX::SUST_B_1D_B8_CLAMP_I; + case NVPTX::SUST_B_1D_B16_CLAMP_R: + return NVPTX::SUST_B_1D_B16_CLAMP_I; + case NVPTX::SUST_B_1D_B32_CLAMP_R: + return NVPTX::SUST_B_1D_B32_CLAMP_I; + case NVPTX::SUST_B_1D_B64_CLAMP_R: + return NVPTX::SUST_B_1D_B64_CLAMP_I; + case NVPTX::SUST_B_1D_V2B8_CLAMP_R: + return NVPTX::SUST_B_1D_V2B8_CLAMP_I; + case NVPTX::SUST_B_1D_V2B16_CLAMP_R: + return NVPTX::SUST_B_1D_V2B16_CLAMP_I; + case NVPTX::SUST_B_1D_V2B32_CLAMP_R: + return NVPTX::SUST_B_1D_V2B32_CLAMP_I; + case NVPTX::SUST_B_1D_V2B64_CLAMP_R: + return NVPTX::SUST_B_1D_V2B64_CLAMP_I; + case NVPTX::SUST_B_1D_V4B8_CLAMP_R: + return NVPTX::SUST_B_1D_V4B8_CLAMP_I; + case NVPTX::SUST_B_1D_V4B16_CLAMP_R: + return NVPTX::SUST_B_1D_V4B16_CLAMP_I; + case NVPTX::SUST_B_1D_V4B32_CLAMP_R: + return NVPTX::SUST_B_1D_V4B32_CLAMP_I; + case NVPTX::SUST_B_1D_ARRAY_B8_CLAMP_R: + return NVPTX::SUST_B_1D_ARRAY_B8_CLAMP_I; + case NVPTX::SUST_B_1D_ARRAY_B16_CLAMP_R: + return NVPTX::SUST_B_1D_ARRAY_B16_CLAMP_I; + case NVPTX::SUST_B_1D_ARRAY_B32_CLAMP_R: + return NVPTX::SUST_B_1D_ARRAY_B32_CLAMP_I; + case NVPTX::SUST_B_1D_ARRAY_B64_CLAMP_R: + return NVPTX::SUST_B_1D_ARRAY_B64_CLAMP_I; + case NVPTX::SUST_B_1D_ARRAY_V2B8_CLAMP_R: + return NVPTX::SUST_B_1D_ARRAY_V2B8_CLAMP_I; + case NVPTX::SUST_B_1D_ARRAY_V2B16_CLAMP_R: + return NVPTX::SUST_B_1D_ARRAY_V2B16_CLAMP_I; + case NVPTX::SUST_B_1D_ARRAY_V2B32_CLAMP_R: + return NVPTX::SUST_B_1D_ARRAY_V2B32_CLAMP_I; + case NVPTX::SUST_B_1D_ARRAY_V2B64_CLAMP_R: + return NVPTX::SUST_B_1D_ARRAY_V2B64_CLAMP_I; + case NVPTX::SUST_B_1D_ARRAY_V4B8_CLAMP_R: + return NVPTX::SUST_B_1D_ARRAY_V4B8_CLAMP_I; + case NVPTX::SUST_B_1D_ARRAY_V4B16_CLAMP_R: + return NVPTX::SUST_B_1D_ARRAY_V4B16_CLAMP_I; + case NVPTX::SUST_B_1D_ARRAY_V4B32_CLAMP_R: + return NVPTX::SUST_B_1D_ARRAY_V4B32_CLAMP_I; + case NVPTX::SUST_B_2D_B8_CLAMP_R: + return NVPTX::SUST_B_2D_B8_CLAMP_I; + case NVPTX::SUST_B_2D_B16_CLAMP_R: + return NVPTX::SUST_B_2D_B16_CLAMP_I; + case NVPTX::SUST_B_2D_B32_CLAMP_R: + return NVPTX::SUST_B_2D_B32_CLAMP_I; + case NVPTX::SUST_B_2D_B64_CLAMP_R: + return NVPTX::SUST_B_2D_B64_CLAMP_I; + case NVPTX::SUST_B_2D_V2B8_CLAMP_R: + return NVPTX::SUST_B_2D_V2B8_CLAMP_I; + case NVPTX::SUST_B_2D_V2B16_CLAMP_R: + return NVPTX::SUST_B_2D_V2B16_CLAMP_I; + case NVPTX::SUST_B_2D_V2B32_CLAMP_R: + return NVPTX::SUST_B_2D_V2B32_CLAMP_I; + case NVPTX::SUST_B_2D_V2B64_CLAMP_R: + return NVPTX::SUST_B_2D_V2B64_CLAMP_I; + case NVPTX::SUST_B_2D_V4B8_CLAMP_R: + return NVPTX::SUST_B_2D_V4B8_CLAMP_I; + case NVPTX::SUST_B_2D_V4B16_CLAMP_R: + return NVPTX::SUST_B_2D_V4B16_CLAMP_I; + case NVPTX::SUST_B_2D_V4B32_CLAMP_R: + return NVPTX::SUST_B_2D_V4B32_CLAMP_I; + case NVPTX::SUST_B_2D_ARRAY_B8_CLAMP_R: + return NVPTX::SUST_B_2D_ARRAY_B8_CLAMP_I; + case NVPTX::SUST_B_2D_ARRAY_B16_CLAMP_R: + return NVPTX::SUST_B_2D_ARRAY_B16_CLAMP_I; + case NVPTX::SUST_B_2D_ARRAY_B32_CLAMP_R: + return NVPTX::SUST_B_2D_ARRAY_B32_CLAMP_I; + case NVPTX::SUST_B_2D_ARRAY_B64_CLAMP_R: + return NVPTX::SUST_B_2D_ARRAY_B64_CLAMP_I; + case NVPTX::SUST_B_2D_ARRAY_V2B8_CLAMP_R: + return NVPTX::SUST_B_2D_ARRAY_V2B8_CLAMP_I; + case NVPTX::SUST_B_2D_ARRAY_V2B16_CLAMP_R: + return NVPTX::SUST_B_2D_ARRAY_V2B16_CLAMP_I; + case NVPTX::SUST_B_2D_ARRAY_V2B32_CLAMP_R: + return NVPTX::SUST_B_2D_ARRAY_V2B32_CLAMP_I; + case NVPTX::SUST_B_2D_ARRAY_V2B64_CLAMP_R: + return NVPTX::SUST_B_2D_ARRAY_V2B64_CLAMP_I; + case NVPTX::SUST_B_2D_ARRAY_V4B8_CLAMP_R: + return NVPTX::SUST_B_2D_ARRAY_V4B8_CLAMP_I; + case NVPTX::SUST_B_2D_ARRAY_V4B16_CLAMP_R: + return NVPTX::SUST_B_2D_ARRAY_V4B16_CLAMP_I; + case NVPTX::SUST_B_2D_ARRAY_V4B32_CLAMP_R: + return NVPTX::SUST_B_2D_ARRAY_V4B32_CLAMP_I; + case NVPTX::SUST_B_3D_B8_CLAMP_R: + return NVPTX::SUST_B_3D_B8_CLAMP_I; + case NVPTX::SUST_B_3D_B16_CLAMP_R: + return NVPTX::SUST_B_3D_B16_CLAMP_I; + case NVPTX::SUST_B_3D_B32_CLAMP_R: + return NVPTX::SUST_B_3D_B32_CLAMP_I; + case NVPTX::SUST_B_3D_B64_CLAMP_R: + return NVPTX::SUST_B_3D_B64_CLAMP_I; + case NVPTX::SUST_B_3D_V2B8_CLAMP_R: + return NVPTX::SUST_B_3D_V2B8_CLAMP_I; + case NVPTX::SUST_B_3D_V2B16_CLAMP_R: + return NVPTX::SUST_B_3D_V2B16_CLAMP_I; + case NVPTX::SUST_B_3D_V2B32_CLAMP_R: + return NVPTX::SUST_B_3D_V2B32_CLAMP_I; + case NVPTX::SUST_B_3D_V2B64_CLAMP_R: + return NVPTX::SUST_B_3D_V2B64_CLAMP_I; + case NVPTX::SUST_B_3D_V4B8_CLAMP_R: + return NVPTX::SUST_B_3D_V4B8_CLAMP_I; + case NVPTX::SUST_B_3D_V4B16_CLAMP_R: + return NVPTX::SUST_B_3D_V4B16_CLAMP_I; + case NVPTX::SUST_B_3D_V4B32_CLAMP_R: + return NVPTX::SUST_B_3D_V4B32_CLAMP_I; + case NVPTX::SUST_B_1D_B8_TRAP_R: + return NVPTX::SUST_B_1D_B8_TRAP_I; + case NVPTX::SUST_B_1D_B16_TRAP_R: + return NVPTX::SUST_B_1D_B16_TRAP_I; + case NVPTX::SUST_B_1D_B32_TRAP_R: + return NVPTX::SUST_B_1D_B32_TRAP_I; + case NVPTX::SUST_B_1D_B64_TRAP_R: + return NVPTX::SUST_B_1D_B64_TRAP_I; + case NVPTX::SUST_B_1D_V2B8_TRAP_R: + return NVPTX::SUST_B_1D_V2B8_TRAP_I; + case NVPTX::SUST_B_1D_V2B16_TRAP_R: + return NVPTX::SUST_B_1D_V2B16_TRAP_I; + case NVPTX::SUST_B_1D_V2B32_TRAP_R: + return NVPTX::SUST_B_1D_V2B32_TRAP_I; + case NVPTX::SUST_B_1D_V2B64_TRAP_R: + return NVPTX::SUST_B_1D_V2B64_TRAP_I; + case NVPTX::SUST_B_1D_V4B8_TRAP_R: + return NVPTX::SUST_B_1D_V4B8_TRAP_I; + case NVPTX::SUST_B_1D_V4B16_TRAP_R: + return NVPTX::SUST_B_1D_V4B16_TRAP_I; + case NVPTX::SUST_B_1D_V4B32_TRAP_R: + return NVPTX::SUST_B_1D_V4B32_TRAP_I; + case NVPTX::SUST_B_1D_ARRAY_B8_TRAP_R: + return NVPTX::SUST_B_1D_ARRAY_B8_TRAP_I; + case NVPTX::SUST_B_1D_ARRAY_B16_TRAP_R: + return NVPTX::SUST_B_1D_ARRAY_B16_TRAP_I; + case NVPTX::SUST_B_1D_ARRAY_B32_TRAP_R: + return NVPTX::SUST_B_1D_ARRAY_B32_TRAP_I; + case NVPTX::SUST_B_1D_ARRAY_B64_TRAP_R: + return NVPTX::SUST_B_1D_ARRAY_B64_TRAP_I; + case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP_R: + return NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP_I; + case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP_R: + return NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP_I; + case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP_R: + return NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP_I; + case NVPTX::SUST_B_1D_ARRAY_V2B64_TRAP_R: + return NVPTX::SUST_B_1D_ARRAY_V2B64_TRAP_I; + case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP_R: + return NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP_I; + case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP_R: + return NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP_I; + case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP_R: + return NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP_I; + case NVPTX::SUST_B_2D_B8_TRAP_R: + return NVPTX::SUST_B_2D_B8_TRAP_I; + case NVPTX::SUST_B_2D_B16_TRAP_R: + return NVPTX::SUST_B_2D_B16_TRAP_I; + case NVPTX::SUST_B_2D_B32_TRAP_R: + return NVPTX::SUST_B_2D_B32_TRAP_I; + case NVPTX::SUST_B_2D_B64_TRAP_R: + return NVPTX::SUST_B_2D_B64_TRAP_I; + case NVPTX::SUST_B_2D_V2B8_TRAP_R: + return NVPTX::SUST_B_2D_V2B8_TRAP_I; + case NVPTX::SUST_B_2D_V2B16_TRAP_R: + return NVPTX::SUST_B_2D_V2B16_TRAP_I; + case NVPTX::SUST_B_2D_V2B32_TRAP_R: + return NVPTX::SUST_B_2D_V2B32_TRAP_I; + case NVPTX::SUST_B_2D_V2B64_TRAP_R: + return NVPTX::SUST_B_2D_V2B64_TRAP_I; + case NVPTX::SUST_B_2D_V4B8_TRAP_R: + return NVPTX::SUST_B_2D_V4B8_TRAP_I; + case NVPTX::SUST_B_2D_V4B16_TRAP_R: + return NVPTX::SUST_B_2D_V4B16_TRAP_I; + case NVPTX::SUST_B_2D_V4B32_TRAP_R: + return NVPTX::SUST_B_2D_V4B32_TRAP_I; + case NVPTX::SUST_B_2D_ARRAY_B8_TRAP_R: + return NVPTX::SUST_B_2D_ARRAY_B8_TRAP_I; + case NVPTX::SUST_B_2D_ARRAY_B16_TRAP_R: + return NVPTX::SUST_B_2D_ARRAY_B16_TRAP_I; + case NVPTX::SUST_B_2D_ARRAY_B32_TRAP_R: + return NVPTX::SUST_B_2D_ARRAY_B32_TRAP_I; + case NVPTX::SUST_B_2D_ARRAY_B64_TRAP_R: + return NVPTX::SUST_B_2D_ARRAY_B64_TRAP_I; + case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP_R: + return NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP_I; + case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP_R: + return NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP_I; + case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP_R: + return NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP_I; + case NVPTX::SUST_B_2D_ARRAY_V2B64_TRAP_R: + return NVPTX::SUST_B_2D_ARRAY_V2B64_TRAP_I; + case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP_R: + return NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP_I; + case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP_R: + return NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP_I; + case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP_R: + return NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP_I; + case NVPTX::SUST_B_3D_B8_TRAP_R: + return NVPTX::SUST_B_3D_B8_TRAP_I; + case NVPTX::SUST_B_3D_B16_TRAP_R: + return NVPTX::SUST_B_3D_B16_TRAP_I; + case NVPTX::SUST_B_3D_B32_TRAP_R: + return NVPTX::SUST_B_3D_B32_TRAP_I; + case NVPTX::SUST_B_3D_B64_TRAP_R: + return NVPTX::SUST_B_3D_B64_TRAP_I; + case NVPTX::SUST_B_3D_V2B8_TRAP_R: + return NVPTX::SUST_B_3D_V2B8_TRAP_I; + case NVPTX::SUST_B_3D_V2B16_TRAP_R: + return NVPTX::SUST_B_3D_V2B16_TRAP_I; + case NVPTX::SUST_B_3D_V2B32_TRAP_R: + return NVPTX::SUST_B_3D_V2B32_TRAP_I; + case NVPTX::SUST_B_3D_V2B64_TRAP_R: + return NVPTX::SUST_B_3D_V2B64_TRAP_I; + case NVPTX::SUST_B_3D_V4B8_TRAP_R: + return NVPTX::SUST_B_3D_V4B8_TRAP_I; + case NVPTX::SUST_B_3D_V4B16_TRAP_R: + return NVPTX::SUST_B_3D_V4B16_TRAP_I; + case NVPTX::SUST_B_3D_V4B32_TRAP_R: + return NVPTX::SUST_B_3D_V4B32_TRAP_I; + case NVPTX::SUST_B_1D_B8_ZERO_R: + return NVPTX::SUST_B_1D_B8_ZERO_I; + case NVPTX::SUST_B_1D_B16_ZERO_R: + return NVPTX::SUST_B_1D_B16_ZERO_I; + case NVPTX::SUST_B_1D_B32_ZERO_R: + return NVPTX::SUST_B_1D_B32_ZERO_I; + case NVPTX::SUST_B_1D_B64_ZERO_R: + return NVPTX::SUST_B_1D_B64_ZERO_I; + case NVPTX::SUST_B_1D_V2B8_ZERO_R: + return NVPTX::SUST_B_1D_V2B8_ZERO_I; + case NVPTX::SUST_B_1D_V2B16_ZERO_R: + return NVPTX::SUST_B_1D_V2B16_ZERO_I; + case NVPTX::SUST_B_1D_V2B32_ZERO_R: + return NVPTX::SUST_B_1D_V2B32_ZERO_I; + case NVPTX::SUST_B_1D_V2B64_ZERO_R: + return NVPTX::SUST_B_1D_V2B64_ZERO_I; + case NVPTX::SUST_B_1D_V4B8_ZERO_R: + return NVPTX::SUST_B_1D_V4B8_ZERO_I; + case NVPTX::SUST_B_1D_V4B16_ZERO_R: + return NVPTX::SUST_B_1D_V4B16_ZERO_I; + case NVPTX::SUST_B_1D_V4B32_ZERO_R: + return NVPTX::SUST_B_1D_V4B32_ZERO_I; + case NVPTX::SUST_B_1D_ARRAY_B8_ZERO_R: + return NVPTX::SUST_B_1D_ARRAY_B8_ZERO_I; + case NVPTX::SUST_B_1D_ARRAY_B16_ZERO_R: + return NVPTX::SUST_B_1D_ARRAY_B16_ZERO_I; + case NVPTX::SUST_B_1D_ARRAY_B32_ZERO_R: + return NVPTX::SUST_B_1D_ARRAY_B32_ZERO_I; + case NVPTX::SUST_B_1D_ARRAY_B64_ZERO_R: + return NVPTX::SUST_B_1D_ARRAY_B64_ZERO_I; + case NVPTX::SUST_B_1D_ARRAY_V2B8_ZERO_R: + return NVPTX::SUST_B_1D_ARRAY_V2B8_ZERO_I; + case NVPTX::SUST_B_1D_ARRAY_V2B16_ZERO_R: + return NVPTX::SUST_B_1D_ARRAY_V2B16_ZERO_I; + case NVPTX::SUST_B_1D_ARRAY_V2B32_ZERO_R: + return NVPTX::SUST_B_1D_ARRAY_V2B32_ZERO_I; + case NVPTX::SUST_B_1D_ARRAY_V2B64_ZERO_R: + return NVPTX::SUST_B_1D_ARRAY_V2B64_ZERO_I; + case NVPTX::SUST_B_1D_ARRAY_V4B8_ZERO_R: + return NVPTX::SUST_B_1D_ARRAY_V4B8_ZERO_I; + case NVPTX::SUST_B_1D_ARRAY_V4B16_ZERO_R: + return NVPTX::SUST_B_1D_ARRAY_V4B16_ZERO_I; + case NVPTX::SUST_B_1D_ARRAY_V4B32_ZERO_R: + return NVPTX::SUST_B_1D_ARRAY_V4B32_ZERO_I; + case NVPTX::SUST_B_2D_B8_ZERO_R: + return NVPTX::SUST_B_2D_B8_ZERO_I; + case NVPTX::SUST_B_2D_B16_ZERO_R: + return NVPTX::SUST_B_2D_B16_ZERO_I; + case NVPTX::SUST_B_2D_B32_ZERO_R: + return NVPTX::SUST_B_2D_B32_ZERO_I; + case NVPTX::SUST_B_2D_B64_ZERO_R: + return NVPTX::SUST_B_2D_B64_ZERO_I; + case NVPTX::SUST_B_2D_V2B8_ZERO_R: + return NVPTX::SUST_B_2D_V2B8_ZERO_I; + case NVPTX::SUST_B_2D_V2B16_ZERO_R: + return NVPTX::SUST_B_2D_V2B16_ZERO_I; + case NVPTX::SUST_B_2D_V2B32_ZERO_R: + return NVPTX::SUST_B_2D_V2B32_ZERO_I; + case NVPTX::SUST_B_2D_V2B64_ZERO_R: + return NVPTX::SUST_B_2D_V2B64_ZERO_I; + case NVPTX::SUST_B_2D_V4B8_ZERO_R: + return NVPTX::SUST_B_2D_V4B8_ZERO_I; + case NVPTX::SUST_B_2D_V4B16_ZERO_R: + return NVPTX::SUST_B_2D_V4B16_ZERO_I; + case NVPTX::SUST_B_2D_V4B32_ZERO_R: + return NVPTX::SUST_B_2D_V4B32_ZERO_I; + case NVPTX::SUST_B_2D_ARRAY_B8_ZERO_R: + return NVPTX::SUST_B_2D_ARRAY_B8_ZERO_I; + case NVPTX::SUST_B_2D_ARRAY_B16_ZERO_R: + return NVPTX::SUST_B_2D_ARRAY_B16_ZERO_I; + case NVPTX::SUST_B_2D_ARRAY_B32_ZERO_R: + return NVPTX::SUST_B_2D_ARRAY_B32_ZERO_I; + case NVPTX::SUST_B_2D_ARRAY_B64_ZERO_R: + return NVPTX::SUST_B_2D_ARRAY_B64_ZERO_I; + case NVPTX::SUST_B_2D_ARRAY_V2B8_ZERO_R: + return NVPTX::SUST_B_2D_ARRAY_V2B8_ZERO_I; + case NVPTX::SUST_B_2D_ARRAY_V2B16_ZERO_R: + return NVPTX::SUST_B_2D_ARRAY_V2B16_ZERO_I; + case NVPTX::SUST_B_2D_ARRAY_V2B32_ZERO_R: + return NVPTX::SUST_B_2D_ARRAY_V2B32_ZERO_I; + case NVPTX::SUST_B_2D_ARRAY_V2B64_ZERO_R: + return NVPTX::SUST_B_2D_ARRAY_V2B64_ZERO_I; + case NVPTX::SUST_B_2D_ARRAY_V4B8_ZERO_R: + return NVPTX::SUST_B_2D_ARRAY_V4B8_ZERO_I; + case NVPTX::SUST_B_2D_ARRAY_V4B16_ZERO_R: + return NVPTX::SUST_B_2D_ARRAY_V4B16_ZERO_I; + case NVPTX::SUST_B_2D_ARRAY_V4B32_ZERO_R: + return NVPTX::SUST_B_2D_ARRAY_V4B32_ZERO_I; + case NVPTX::SUST_B_3D_B8_ZERO_R: + return NVPTX::SUST_B_3D_B8_ZERO_I; + case NVPTX::SUST_B_3D_B16_ZERO_R: + return NVPTX::SUST_B_3D_B16_ZERO_I; + case NVPTX::SUST_B_3D_B32_ZERO_R: + return NVPTX::SUST_B_3D_B32_ZERO_I; + case NVPTX::SUST_B_3D_B64_ZERO_R: + return NVPTX::SUST_B_3D_B64_ZERO_I; + case NVPTX::SUST_B_3D_V2B8_ZERO_R: + return NVPTX::SUST_B_3D_V2B8_ZERO_I; + case NVPTX::SUST_B_3D_V2B16_ZERO_R: + return NVPTX::SUST_B_3D_V2B16_ZERO_I; + case NVPTX::SUST_B_3D_V2B32_ZERO_R: + return NVPTX::SUST_B_3D_V2B32_ZERO_I; + case NVPTX::SUST_B_3D_V2B64_ZERO_R: + return NVPTX::SUST_B_3D_V2B64_ZERO_I; + case NVPTX::SUST_B_3D_V4B8_ZERO_R: + return NVPTX::SUST_B_3D_V4B8_ZERO_I; + case NVPTX::SUST_B_3D_V4B16_ZERO_R: + return NVPTX::SUST_B_3D_V4B16_ZERO_I; + case NVPTX::SUST_B_3D_V4B32_ZERO_R: + return NVPTX::SUST_B_3D_V4B32_ZERO_I; + case NVPTX::SUST_P_1D_B8_TRAP_R: + return NVPTX::SUST_P_1D_B8_TRAP_I; + case NVPTX::SUST_P_1D_B16_TRAP_R: + return NVPTX::SUST_P_1D_B16_TRAP_I; + case NVPTX::SUST_P_1D_B32_TRAP_R: + return NVPTX::SUST_P_1D_B32_TRAP_I; + case NVPTX::SUST_P_1D_V2B8_TRAP_R: + return NVPTX::SUST_P_1D_V2B8_TRAP_I; + case NVPTX::SUST_P_1D_V2B16_TRAP_R: + return NVPTX::SUST_P_1D_V2B16_TRAP_I; + case NVPTX::SUST_P_1D_V2B32_TRAP_R: + return NVPTX::SUST_P_1D_V2B32_TRAP_I; + case NVPTX::SUST_P_1D_V4B8_TRAP_R: + return NVPTX::SUST_P_1D_V4B8_TRAP_I; + case NVPTX::SUST_P_1D_V4B16_TRAP_R: + return NVPTX::SUST_P_1D_V4B16_TRAP_I; + case NVPTX::SUST_P_1D_V4B32_TRAP_R: + return NVPTX::SUST_P_1D_V4B32_TRAP_I; + case NVPTX::SUST_P_1D_ARRAY_B8_TRAP_R: + return NVPTX::SUST_P_1D_ARRAY_B8_TRAP_I; + case NVPTX::SUST_P_1D_ARRAY_B16_TRAP_R: + return NVPTX::SUST_P_1D_ARRAY_B16_TRAP_I; + case NVPTX::SUST_P_1D_ARRAY_B32_TRAP_R: + return NVPTX::SUST_P_1D_ARRAY_B32_TRAP_I; + case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP_R: + return NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP_I; + case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP_R: + return NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP_I; + case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP_R: + return NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP_I; + case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP_R: + return NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP_I; + case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP_R: + return NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP_I; + case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP_R: + return NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP_I; + case NVPTX::SUST_P_2D_B8_TRAP_R: + return NVPTX::SUST_P_2D_B8_TRAP_I; + case NVPTX::SUST_P_2D_B16_TRAP_R: + return NVPTX::SUST_P_2D_B16_TRAP_I; + case NVPTX::SUST_P_2D_B32_TRAP_R: + return NVPTX::SUST_P_2D_B32_TRAP_I; + case NVPTX::SUST_P_2D_V2B8_TRAP_R: + return NVPTX::SUST_P_2D_V2B8_TRAP_I; + case NVPTX::SUST_P_2D_V2B16_TRAP_R: + return NVPTX::SUST_P_2D_V2B16_TRAP_I; + case NVPTX::SUST_P_2D_V2B32_TRAP_R: + return NVPTX::SUST_P_2D_V2B32_TRAP_I; + case NVPTX::SUST_P_2D_V4B8_TRAP_R: + return NVPTX::SUST_P_2D_V4B8_TRAP_I; + case NVPTX::SUST_P_2D_V4B16_TRAP_R: + return NVPTX::SUST_P_2D_V4B16_TRAP_I; + case NVPTX::SUST_P_2D_V4B32_TRAP_R: + return NVPTX::SUST_P_2D_V4B32_TRAP_I; + case NVPTX::SUST_P_2D_ARRAY_B8_TRAP_R: + return NVPTX::SUST_P_2D_ARRAY_B8_TRAP_I; + case NVPTX::SUST_P_2D_ARRAY_B16_TRAP_R: + return NVPTX::SUST_P_2D_ARRAY_B16_TRAP_I; + case NVPTX::SUST_P_2D_ARRAY_B32_TRAP_R: + return NVPTX::SUST_P_2D_ARRAY_B32_TRAP_I; + case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP_R: + return NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP_I; + case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP_R: + return NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP_I; + case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP_R: + return NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP_I; + case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP_R: + return NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP_I; + case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP_R: + return NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP_I; + case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP_R: + return NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP_I; + case NVPTX::SUST_P_3D_B8_TRAP_R: + return NVPTX::SUST_P_3D_B8_TRAP_I; + case NVPTX::SUST_P_3D_B16_TRAP_R: + return NVPTX::SUST_P_3D_B16_TRAP_I; + case NVPTX::SUST_P_3D_B32_TRAP_R: + return NVPTX::SUST_P_3D_B32_TRAP_I; + case NVPTX::SUST_P_3D_V2B8_TRAP_R: + return NVPTX::SUST_P_3D_V2B8_TRAP_I; + case NVPTX::SUST_P_3D_V2B16_TRAP_R: + return NVPTX::SUST_P_3D_V2B16_TRAP_I; + case NVPTX::SUST_P_3D_V2B32_TRAP_R: + return NVPTX::SUST_P_3D_V2B32_TRAP_I; + case NVPTX::SUST_P_3D_V4B8_TRAP_R: + return NVPTX::SUST_P_3D_V4B8_TRAP_I; + case NVPTX::SUST_P_3D_V4B16_TRAP_R: + return NVPTX::SUST_P_3D_V4B16_TRAP_I; + case NVPTX::SUST_P_3D_V4B32_TRAP_R: + return NVPTX::SUST_P_3D_V4B32_TRAP_I; + default: + llvm_unreachable("Unhandled SUST opcode"); + } +} + +static unsigned texRegisterToIndexOpcode(unsigned RegOC) { + switch (RegOC) { + case NVPTX::TEX_1D_F32_S32_RR: + return NVPTX::TEX_1D_F32_S32_IR; + case NVPTX::TEX_1D_F32_S32_RI: + return NVPTX::TEX_1D_F32_S32_II; + case NVPTX::TEX_1D_F32_F32_RR: + return NVPTX::TEX_1D_F32_F32_IR; + case NVPTX::TEX_1D_F32_F32_RI: + return NVPTX::TEX_1D_F32_F32_II; + case NVPTX::TEX_1D_F32_F32_LEVEL_RR: + return NVPTX::TEX_1D_F32_F32_LEVEL_IR; + case NVPTX::TEX_1D_F32_F32_LEVEL_RI: + return NVPTX::TEX_1D_F32_F32_LEVEL_II; + case NVPTX::TEX_1D_F32_F32_GRAD_RR: + return NVPTX::TEX_1D_F32_F32_GRAD_IR; + case NVPTX::TEX_1D_F32_F32_GRAD_RI: + return NVPTX::TEX_1D_F32_F32_GRAD_II; + case NVPTX::TEX_1D_S32_S32_RR: + return NVPTX::TEX_1D_S32_S32_IR; + case NVPTX::TEX_1D_S32_S32_RI: + return NVPTX::TEX_1D_S32_S32_II; + case NVPTX::TEX_1D_S32_F32_RR: + return NVPTX::TEX_1D_S32_F32_IR; + case NVPTX::TEX_1D_S32_F32_RI: + return NVPTX::TEX_1D_S32_F32_II; + case NVPTX::TEX_1D_S32_F32_LEVEL_RR: + return NVPTX::TEX_1D_S32_F32_LEVEL_IR; + case NVPTX::TEX_1D_S32_F32_LEVEL_RI: + return NVPTX::TEX_1D_S32_F32_LEVEL_II; + case NVPTX::TEX_1D_S32_F32_GRAD_RR: + return NVPTX::TEX_1D_S32_F32_GRAD_IR; + case NVPTX::TEX_1D_S32_F32_GRAD_RI: + return NVPTX::TEX_1D_S32_F32_GRAD_II; + case NVPTX::TEX_1D_U32_S32_RR: + return NVPTX::TEX_1D_U32_S32_IR; + case NVPTX::TEX_1D_U32_S32_RI: + return NVPTX::TEX_1D_U32_S32_II; + case NVPTX::TEX_1D_U32_F32_RR: + return NVPTX::TEX_1D_U32_F32_IR; + case NVPTX::TEX_1D_U32_F32_RI: + return NVPTX::TEX_1D_U32_F32_II; + case NVPTX::TEX_1D_U32_F32_LEVEL_RR: + return NVPTX::TEX_1D_U32_F32_LEVEL_IR; + case NVPTX::TEX_1D_U32_F32_LEVEL_RI: + return NVPTX::TEX_1D_U32_F32_LEVEL_II; + case NVPTX::TEX_1D_U32_F32_GRAD_RR: + return NVPTX::TEX_1D_U32_F32_GRAD_IR; + case NVPTX::TEX_1D_U32_F32_GRAD_RI: + return NVPTX::TEX_1D_U32_F32_GRAD_II; + case NVPTX::TEX_1D_ARRAY_F32_S32_RR: + return NVPTX::TEX_1D_ARRAY_F32_S32_IR; + case NVPTX::TEX_1D_ARRAY_F32_S32_RI: + return NVPTX::TEX_1D_ARRAY_F32_S32_II; + case NVPTX::TEX_1D_ARRAY_F32_F32_RR: + return NVPTX::TEX_1D_ARRAY_F32_F32_IR; + case NVPTX::TEX_1D_ARRAY_F32_F32_RI: + return NVPTX::TEX_1D_ARRAY_F32_F32_II; + case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR: + return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_IR; + case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RI: + return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_II; + case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR: + return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_IR; + case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RI: + return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_II; + case NVPTX::TEX_1D_ARRAY_S32_S32_RR: + return NVPTX::TEX_1D_ARRAY_S32_S32_IR; + case NVPTX::TEX_1D_ARRAY_S32_S32_RI: + return NVPTX::TEX_1D_ARRAY_S32_S32_II; + case NVPTX::TEX_1D_ARRAY_S32_F32_RR: + return NVPTX::TEX_1D_ARRAY_S32_F32_IR; + case NVPTX::TEX_1D_ARRAY_S32_F32_RI: + return NVPTX::TEX_1D_ARRAY_S32_F32_II; + case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR: + return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_IR; + case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RI: + return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_II; + case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR: + return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_IR; + case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RI: + return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_II; + case NVPTX::TEX_1D_ARRAY_U32_S32_RR: + return NVPTX::TEX_1D_ARRAY_U32_S32_IR; + case NVPTX::TEX_1D_ARRAY_U32_S32_RI: + return NVPTX::TEX_1D_ARRAY_U32_S32_II; + case NVPTX::TEX_1D_ARRAY_U32_F32_RR: + return NVPTX::TEX_1D_ARRAY_U32_F32_IR; + case NVPTX::TEX_1D_ARRAY_U32_F32_RI: + return NVPTX::TEX_1D_ARRAY_U32_F32_II; + case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR: + return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_IR; + case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RI: + return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_II; + case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR: + return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_IR; + case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RI: + return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_II; + case NVPTX::TEX_2D_F32_S32_RR: + return NVPTX::TEX_2D_F32_S32_IR; + case NVPTX::TEX_2D_F32_S32_RI: + return NVPTX::TEX_2D_F32_S32_II; + case NVPTX::TEX_2D_F32_F32_RR: + return NVPTX::TEX_2D_F32_F32_IR; + case NVPTX::TEX_2D_F32_F32_RI: + return NVPTX::TEX_2D_F32_F32_II; + case NVPTX::TEX_2D_F32_F32_LEVEL_RR: + return NVPTX::TEX_2D_F32_F32_LEVEL_IR; + case NVPTX::TEX_2D_F32_F32_LEVEL_RI: + return NVPTX::TEX_2D_F32_F32_LEVEL_II; + case NVPTX::TEX_2D_F32_F32_GRAD_RR: + return NVPTX::TEX_2D_F32_F32_GRAD_IR; + case NVPTX::TEX_2D_F32_F32_GRAD_RI: + return NVPTX::TEX_2D_F32_F32_GRAD_II; + case NVPTX::TEX_2D_S32_S32_RR: + return NVPTX::TEX_2D_S32_S32_IR; + case NVPTX::TEX_2D_S32_S32_RI: + return NVPTX::TEX_2D_S32_S32_II; + case NVPTX::TEX_2D_S32_F32_RR: + return NVPTX::TEX_2D_S32_F32_IR; + case NVPTX::TEX_2D_S32_F32_RI: + return NVPTX::TEX_2D_S32_F32_II; + case NVPTX::TEX_2D_S32_F32_LEVEL_RR: + return NVPTX::TEX_2D_S32_F32_LEVEL_IR; + case NVPTX::TEX_2D_S32_F32_LEVEL_RI: + return NVPTX::TEX_2D_S32_F32_LEVEL_II; + case NVPTX::TEX_2D_S32_F32_GRAD_RR: + return NVPTX::TEX_2D_S32_F32_GRAD_IR; + case NVPTX::TEX_2D_S32_F32_GRAD_RI: + return NVPTX::TEX_2D_S32_F32_GRAD_II; + case NVPTX::TEX_2D_U32_S32_RR: + return NVPTX::TEX_2D_U32_S32_IR; + case NVPTX::TEX_2D_U32_S32_RI: + return NVPTX::TEX_2D_U32_S32_II; + case NVPTX::TEX_2D_U32_F32_RR: + return NVPTX::TEX_2D_U32_F32_IR; + case NVPTX::TEX_2D_U32_F32_RI: + return NVPTX::TEX_2D_U32_F32_II; + case NVPTX::TEX_2D_U32_F32_LEVEL_RR: + return NVPTX::TEX_2D_U32_F32_LEVEL_IR; + case NVPTX::TEX_2D_U32_F32_LEVEL_RI: + return NVPTX::TEX_2D_U32_F32_LEVEL_II; + case NVPTX::TEX_2D_U32_F32_GRAD_RR: + return NVPTX::TEX_2D_U32_F32_GRAD_IR; + case NVPTX::TEX_2D_U32_F32_GRAD_RI: + return NVPTX::TEX_2D_U32_F32_GRAD_II; + case NVPTX::TEX_2D_ARRAY_F32_S32_RR: + return NVPTX::TEX_2D_ARRAY_F32_S32_IR; + case NVPTX::TEX_2D_ARRAY_F32_S32_RI: + return NVPTX::TEX_2D_ARRAY_F32_S32_II; + case NVPTX::TEX_2D_ARRAY_F32_F32_RR: + return NVPTX::TEX_2D_ARRAY_F32_F32_IR; + case NVPTX::TEX_2D_ARRAY_F32_F32_RI: + return NVPTX::TEX_2D_ARRAY_F32_F32_II; + case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR: + return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_IR; + case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RI: + return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_II; + case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR: + return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_IR; + case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RI: + return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_II; + case NVPTX::TEX_2D_ARRAY_S32_S32_RR: + return NVPTX::TEX_2D_ARRAY_S32_S32_IR; + case NVPTX::TEX_2D_ARRAY_S32_S32_RI: + return NVPTX::TEX_2D_ARRAY_S32_S32_II; + case NVPTX::TEX_2D_ARRAY_S32_F32_RR: + return NVPTX::TEX_2D_ARRAY_S32_F32_IR; + case NVPTX::TEX_2D_ARRAY_S32_F32_RI: + return NVPTX::TEX_2D_ARRAY_S32_F32_II; + case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR: + return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_IR; + case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RI: + return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_II; + case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR: + return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_IR; + case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RI: + return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_II; + case NVPTX::TEX_2D_ARRAY_U32_S32_RR: + return NVPTX::TEX_2D_ARRAY_U32_S32_IR; + case NVPTX::TEX_2D_ARRAY_U32_S32_RI: + return NVPTX::TEX_2D_ARRAY_U32_S32_II; + case NVPTX::TEX_2D_ARRAY_U32_F32_RR: + return NVPTX::TEX_2D_ARRAY_U32_F32_IR; + case NVPTX::TEX_2D_ARRAY_U32_F32_RI: + return NVPTX::TEX_2D_ARRAY_U32_F32_II; + case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR: + return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_IR; + case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RI: + return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_II; + case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR: + return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_IR; + case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RI: + return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_II; + case NVPTX::TEX_3D_F32_S32_RR: + return NVPTX::TEX_3D_F32_S32_IR; + case NVPTX::TEX_3D_F32_S32_RI: + return NVPTX::TEX_3D_F32_S32_II; + case NVPTX::TEX_3D_F32_F32_RR: + return NVPTX::TEX_3D_F32_F32_IR; + case NVPTX::TEX_3D_F32_F32_RI: + return NVPTX::TEX_3D_F32_F32_II; + case NVPTX::TEX_3D_F32_F32_LEVEL_RR: + return NVPTX::TEX_3D_F32_F32_LEVEL_IR; + case NVPTX::TEX_3D_F32_F32_LEVEL_RI: + return NVPTX::TEX_3D_F32_F32_LEVEL_II; + case NVPTX::TEX_3D_F32_F32_GRAD_RR: + return NVPTX::TEX_3D_F32_F32_GRAD_IR; + case NVPTX::TEX_3D_F32_F32_GRAD_RI: + return NVPTX::TEX_3D_F32_F32_GRAD_II; + case NVPTX::TEX_3D_S32_S32_RR: + return NVPTX::TEX_3D_S32_S32_IR; + case NVPTX::TEX_3D_S32_S32_RI: + return NVPTX::TEX_3D_S32_S32_II; + case NVPTX::TEX_3D_S32_F32_RR: + return NVPTX::TEX_3D_S32_F32_IR; + case NVPTX::TEX_3D_S32_F32_RI: + return NVPTX::TEX_3D_S32_F32_II; + case NVPTX::TEX_3D_S32_F32_LEVEL_RR: + return NVPTX::TEX_3D_S32_F32_LEVEL_IR; + case NVPTX::TEX_3D_S32_F32_LEVEL_RI: + return NVPTX::TEX_3D_S32_F32_LEVEL_II; + case NVPTX::TEX_3D_S32_F32_GRAD_RR: + return NVPTX::TEX_3D_S32_F32_GRAD_IR; + case NVPTX::TEX_3D_S32_F32_GRAD_RI: + return NVPTX::TEX_3D_S32_F32_GRAD_II; + case NVPTX::TEX_3D_U32_S32_RR: + return NVPTX::TEX_3D_U32_S32_IR; + case NVPTX::TEX_3D_U32_S32_RI: + return NVPTX::TEX_3D_U32_S32_II; + case NVPTX::TEX_3D_U32_F32_RR: + return NVPTX::TEX_3D_U32_F32_IR; + case NVPTX::TEX_3D_U32_F32_RI: + return NVPTX::TEX_3D_U32_F32_II; + case NVPTX::TEX_3D_U32_F32_LEVEL_RR: + return NVPTX::TEX_3D_U32_F32_LEVEL_IR; + case NVPTX::TEX_3D_U32_F32_LEVEL_RI: + return NVPTX::TEX_3D_U32_F32_LEVEL_II; + case NVPTX::TEX_3D_U32_F32_GRAD_RR: + return NVPTX::TEX_3D_U32_F32_GRAD_IR; + case NVPTX::TEX_3D_U32_F32_GRAD_RI: + return NVPTX::TEX_3D_U32_F32_GRAD_II; + case NVPTX::TEX_CUBE_F32_F32_RR: + return NVPTX::TEX_CUBE_F32_F32_IR; + case NVPTX::TEX_CUBE_F32_F32_RI: + return NVPTX::TEX_CUBE_F32_F32_II; + case NVPTX::TEX_CUBE_F32_F32_LEVEL_RR: + return NVPTX::TEX_CUBE_F32_F32_LEVEL_IR; + case NVPTX::TEX_CUBE_F32_F32_LEVEL_RI: + return NVPTX::TEX_CUBE_F32_F32_LEVEL_II; + case NVPTX::TEX_CUBE_S32_F32_RR: + return NVPTX::TEX_CUBE_S32_F32_IR; + case NVPTX::TEX_CUBE_S32_F32_RI: + return NVPTX::TEX_CUBE_S32_F32_II; + case NVPTX::TEX_CUBE_S32_F32_LEVEL_RR: + return NVPTX::TEX_CUBE_S32_F32_LEVEL_IR; + case NVPTX::TEX_CUBE_S32_F32_LEVEL_RI: + return NVPTX::TEX_CUBE_S32_F32_LEVEL_II; + case NVPTX::TEX_CUBE_U32_F32_RR: + return NVPTX::TEX_CUBE_U32_F32_IR; + case NVPTX::TEX_CUBE_U32_F32_RI: + return NVPTX::TEX_CUBE_U32_F32_II; + case NVPTX::TEX_CUBE_U32_F32_LEVEL_RR: + return NVPTX::TEX_CUBE_U32_F32_LEVEL_IR; + case NVPTX::TEX_CUBE_U32_F32_LEVEL_RI: + return NVPTX::TEX_CUBE_U32_F32_LEVEL_II; + case NVPTX::TEX_CUBE_ARRAY_F32_F32_RR: + return NVPTX::TEX_CUBE_ARRAY_F32_F32_IR; + case NVPTX::TEX_CUBE_ARRAY_F32_F32_RI: + return NVPTX::TEX_CUBE_ARRAY_F32_F32_II; + case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR: + return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_IR; + case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RI: + return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_II; + case NVPTX::TEX_CUBE_ARRAY_S32_F32_RR: + return NVPTX::TEX_CUBE_ARRAY_S32_F32_IR; + case NVPTX::TEX_CUBE_ARRAY_S32_F32_RI: + return NVPTX::TEX_CUBE_ARRAY_S32_F32_II; + case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR: + return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_IR; + case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RI: + return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_II; + case NVPTX::TEX_CUBE_ARRAY_U32_F32_RR: + return NVPTX::TEX_CUBE_ARRAY_U32_F32_IR; + case NVPTX::TEX_CUBE_ARRAY_U32_F32_RI: + return NVPTX::TEX_CUBE_ARRAY_U32_F32_II; + case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR: + return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_IR; + case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RI: + return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_II; + case NVPTX::TLD4_R_2D_F32_F32_RR: + return NVPTX::TLD4_R_2D_F32_F32_IR; + case NVPTX::TLD4_R_2D_F32_F32_RI: + return NVPTX::TLD4_R_2D_F32_F32_II; + case NVPTX::TLD4_G_2D_F32_F32_RR: + return NVPTX::TLD4_G_2D_F32_F32_IR; + case NVPTX::TLD4_G_2D_F32_F32_RI: + return NVPTX::TLD4_G_2D_F32_F32_II; + case NVPTX::TLD4_B_2D_F32_F32_RR: + return NVPTX::TLD4_B_2D_F32_F32_IR; + case NVPTX::TLD4_B_2D_F32_F32_RI: + return NVPTX::TLD4_B_2D_F32_F32_II; + case NVPTX::TLD4_A_2D_F32_F32_RR: + return NVPTX::TLD4_A_2D_F32_F32_IR; + case NVPTX::TLD4_A_2D_F32_F32_RI: + return NVPTX::TLD4_A_2D_F32_F32_II; + case NVPTX::TLD4_R_2D_S32_F32_RR: + return NVPTX::TLD4_R_2D_S32_F32_IR; + case NVPTX::TLD4_R_2D_S32_F32_RI: + return NVPTX::TLD4_R_2D_S32_F32_II; + case NVPTX::TLD4_G_2D_S32_F32_RR: + return NVPTX::TLD4_G_2D_S32_F32_IR; + case NVPTX::TLD4_G_2D_S32_F32_RI: + return NVPTX::TLD4_G_2D_S32_F32_II; + case NVPTX::TLD4_B_2D_S32_F32_RR: + return NVPTX::TLD4_B_2D_S32_F32_IR; + case NVPTX::TLD4_B_2D_S32_F32_RI: + return NVPTX::TLD4_B_2D_S32_F32_II; + case NVPTX::TLD4_A_2D_S32_F32_RR: + return NVPTX::TLD4_A_2D_S32_F32_IR; + case NVPTX::TLD4_A_2D_S32_F32_RI: + return NVPTX::TLD4_A_2D_S32_F32_II; + case NVPTX::TLD4_R_2D_U32_F32_RR: + return NVPTX::TLD4_R_2D_U32_F32_IR; + case NVPTX::TLD4_R_2D_U32_F32_RI: + return NVPTX::TLD4_R_2D_U32_F32_II; + case NVPTX::TLD4_G_2D_U32_F32_RR: + return NVPTX::TLD4_G_2D_U32_F32_IR; + case NVPTX::TLD4_G_2D_U32_F32_RI: + return NVPTX::TLD4_G_2D_U32_F32_II; + case NVPTX::TLD4_B_2D_U32_F32_RR: + return NVPTX::TLD4_B_2D_U32_F32_IR; + case NVPTX::TLD4_B_2D_U32_F32_RI: + return NVPTX::TLD4_B_2D_U32_F32_II; + case NVPTX::TLD4_A_2D_U32_F32_RR: + return NVPTX::TLD4_A_2D_U32_F32_IR; + case NVPTX::TLD4_A_2D_U32_F32_RI: + return NVPTX::TLD4_A_2D_U32_F32_II; + case NVPTX::TEX_UNIFIED_1D_F32_S32_R: + return NVPTX::TEX_UNIFIED_1D_F32_S32_I; + case NVPTX::TEX_UNIFIED_1D_F32_F32_R: + return NVPTX::TEX_UNIFIED_1D_F32_F32_I; + case NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R: + return NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_I; + case NVPTX::TEX_UNIFIED_1D_S32_S32_R: + return NVPTX::TEX_UNIFIED_1D_S32_S32_I; + case NVPTX::TEX_UNIFIED_1D_S32_F32_R: + return NVPTX::TEX_UNIFIED_1D_S32_F32_I; + case NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R: + return NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_I; + case NVPTX::TEX_UNIFIED_1D_U32_S32_R: + return NVPTX::TEX_UNIFIED_1D_U32_S32_I; + case NVPTX::TEX_UNIFIED_1D_U32_F32_R: + return NVPTX::TEX_UNIFIED_1D_U32_F32_I; + case NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R: + return NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_I; + case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R: + return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_I; + case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R: + return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_I; + case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R: + return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_I; + case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R: + return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_I; + case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R: + return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_I; + case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R: + return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_I; + case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R: + return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_I; + case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R: + return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_I; + case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R: + return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_I; + case NVPTX::TEX_UNIFIED_2D_F32_S32_R: + return NVPTX::TEX_UNIFIED_2D_F32_S32_I; + case NVPTX::TEX_UNIFIED_2D_F32_F32_R: + return NVPTX::TEX_UNIFIED_2D_F32_F32_I; + case NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R: + return NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_I; + case NVPTX::TEX_UNIFIED_2D_S32_S32_R: + return NVPTX::TEX_UNIFIED_2D_S32_S32_I; + case NVPTX::TEX_UNIFIED_2D_S32_F32_R: + return NVPTX::TEX_UNIFIED_2D_S32_F32_I; + case NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R: + return NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_I; + case NVPTX::TEX_UNIFIED_2D_U32_S32_R: + return NVPTX::TEX_UNIFIED_2D_U32_S32_I; + case NVPTX::TEX_UNIFIED_2D_U32_F32_R: + return NVPTX::TEX_UNIFIED_2D_U32_F32_I; + case NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R: + return NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_I; + case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R: + return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_I; + case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R: + return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_I; + case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R: + return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_I; + case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R: + return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_I; + case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R: + return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_I; + case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R: + return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_I; + case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R: + return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_I; + case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R: + return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_I; + case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R: + return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_I; + case NVPTX::TEX_UNIFIED_3D_F32_S32_R: + return NVPTX::TEX_UNIFIED_3D_F32_S32_I; + case NVPTX::TEX_UNIFIED_3D_F32_F32_R: + return NVPTX::TEX_UNIFIED_3D_F32_F32_I; + case NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R: + return NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_I; + case NVPTX::TEX_UNIFIED_3D_S32_S32_R: + return NVPTX::TEX_UNIFIED_3D_S32_S32_I; + case NVPTX::TEX_UNIFIED_3D_S32_F32_R: + return NVPTX::TEX_UNIFIED_3D_S32_F32_I; + case NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R: + return NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_I; + case NVPTX::TEX_UNIFIED_3D_U32_S32_R: + return NVPTX::TEX_UNIFIED_3D_U32_S32_I; + case NVPTX::TEX_UNIFIED_3D_U32_F32_R: + return NVPTX::TEX_UNIFIED_3D_U32_F32_I; + case NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R: + return NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_I; + case NVPTX::TEX_UNIFIED_CUBE_F32_F32_R: + return NVPTX::TEX_UNIFIED_CUBE_F32_F32_I; + case NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_CUBE_S32_F32_R: + return NVPTX::TEX_UNIFIED_CUBE_S32_F32_I; + case NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_CUBE_U32_F32_R: + return NVPTX::TEX_UNIFIED_CUBE_U32_F32_I; + case NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R: + return NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_I; + case NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R: + return NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_I; + case NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_I; + case NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R: + return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_I; + case NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R: + return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_I; + case NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R: + return NVPTX::TLD4_UNIFIED_R_2D_F32_F32_I; + case NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R: + return NVPTX::TLD4_UNIFIED_G_2D_F32_F32_I; + case NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R: + return NVPTX::TLD4_UNIFIED_B_2D_F32_F32_I; + case NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R: + return NVPTX::TLD4_UNIFIED_A_2D_F32_F32_I; + case NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R: + return NVPTX::TLD4_UNIFIED_R_2D_S32_F32_I; + case NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R: + return NVPTX::TLD4_UNIFIED_G_2D_S32_F32_I; + case NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R: + return NVPTX::TLD4_UNIFIED_B_2D_S32_F32_I; + case NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R: + return NVPTX::TLD4_UNIFIED_A_2D_S32_F32_I; + case NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R: + return NVPTX::TLD4_UNIFIED_R_2D_U32_F32_I; + case NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R: + return NVPTX::TLD4_UNIFIED_G_2D_U32_F32_I; + case NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R: + return NVPTX::TLD4_UNIFIED_B_2D_U32_F32_I; + case NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R: + return NVPTX::TLD4_UNIFIED_A_2D_U32_F32_I; + default: + llvm_unreachable("Unhandled TEX opcode"); + }; +} + +static unsigned samplerRegisterToIndexOpcode(unsigned RegOC) { + switch (RegOC) { + case NVPTX::TEX_1D_F32_S32_RR: + return NVPTX::TEX_1D_F32_S32_RI; + case NVPTX::TEX_1D_F32_S32_IR: + return NVPTX::TEX_1D_F32_S32_II; + case NVPTX::TEX_1D_F32_F32_RR: + return NVPTX::TEX_1D_F32_F32_RI; + case NVPTX::TEX_1D_F32_F32_IR: + return NVPTX::TEX_1D_F32_F32_II; + case NVPTX::TEX_1D_F32_F32_LEVEL_RR: + return NVPTX::TEX_1D_F32_F32_LEVEL_RI; + case NVPTX::TEX_1D_F32_F32_LEVEL_IR: + return NVPTX::TEX_1D_F32_F32_LEVEL_II; + case NVPTX::TEX_1D_F32_F32_GRAD_RR: + return NVPTX::TEX_1D_F32_F32_GRAD_RI; + case NVPTX::TEX_1D_F32_F32_GRAD_IR: + return NVPTX::TEX_1D_F32_F32_GRAD_II; + case NVPTX::TEX_1D_S32_S32_RR: + return NVPTX::TEX_1D_S32_S32_RI; + case NVPTX::TEX_1D_S32_S32_IR: + return NVPTX::TEX_1D_S32_S32_II; + case NVPTX::TEX_1D_S32_F32_RR: + return NVPTX::TEX_1D_S32_F32_RI; + case NVPTX::TEX_1D_S32_F32_IR: + return NVPTX::TEX_1D_S32_F32_II; + case NVPTX::TEX_1D_S32_F32_LEVEL_RR: + return NVPTX::TEX_1D_S32_F32_LEVEL_RI; + case NVPTX::TEX_1D_S32_F32_LEVEL_IR: + return NVPTX::TEX_1D_S32_F32_LEVEL_II; + case NVPTX::TEX_1D_S32_F32_GRAD_RR: + return NVPTX::TEX_1D_S32_F32_GRAD_RI; + case NVPTX::TEX_1D_S32_F32_GRAD_IR: + return NVPTX::TEX_1D_S32_F32_GRAD_II; + case NVPTX::TEX_1D_U32_S32_RR: + return NVPTX::TEX_1D_U32_S32_RI; + case NVPTX::TEX_1D_U32_S32_IR: + return NVPTX::TEX_1D_U32_S32_II; + case NVPTX::TEX_1D_U32_F32_RR: + return NVPTX::TEX_1D_U32_F32_RI; + case NVPTX::TEX_1D_U32_F32_IR: + return NVPTX::TEX_1D_U32_F32_II; + case NVPTX::TEX_1D_U32_F32_LEVEL_RR: + return NVPTX::TEX_1D_U32_F32_LEVEL_RI; + case NVPTX::TEX_1D_U32_F32_LEVEL_IR: + return NVPTX::TEX_1D_U32_F32_LEVEL_II; + case NVPTX::TEX_1D_U32_F32_GRAD_RR: + return NVPTX::TEX_1D_U32_F32_GRAD_RI; + case NVPTX::TEX_1D_U32_F32_GRAD_IR: + return NVPTX::TEX_1D_U32_F32_GRAD_II; + case NVPTX::TEX_1D_ARRAY_F32_S32_RR: + return NVPTX::TEX_1D_ARRAY_F32_S32_RI; + case NVPTX::TEX_1D_ARRAY_F32_S32_IR: + return NVPTX::TEX_1D_ARRAY_F32_S32_II; + case NVPTX::TEX_1D_ARRAY_F32_F32_RR: + return NVPTX::TEX_1D_ARRAY_F32_F32_RI; + case NVPTX::TEX_1D_ARRAY_F32_F32_IR: + return NVPTX::TEX_1D_ARRAY_F32_F32_II; + case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR: + return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RI; + case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_IR: + return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_II; + case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR: + return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RI; + case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_IR: + return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_II; + case NVPTX::TEX_1D_ARRAY_S32_S32_RR: + return NVPTX::TEX_1D_ARRAY_S32_S32_RI; + case NVPTX::TEX_1D_ARRAY_S32_S32_IR: + return NVPTX::TEX_1D_ARRAY_S32_S32_II; + case NVPTX::TEX_1D_ARRAY_S32_F32_RR: + return NVPTX::TEX_1D_ARRAY_S32_F32_RI; + case NVPTX::TEX_1D_ARRAY_S32_F32_IR: + return NVPTX::TEX_1D_ARRAY_S32_F32_II; + case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR: + return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RI; + case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_IR: + return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_II; + case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR: + return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RI; + case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_IR: + return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_II; + case NVPTX::TEX_1D_ARRAY_U32_S32_RR: + return NVPTX::TEX_1D_ARRAY_U32_S32_RI; + case NVPTX::TEX_1D_ARRAY_U32_S32_IR: + return NVPTX::TEX_1D_ARRAY_U32_S32_II; + case NVPTX::TEX_1D_ARRAY_U32_F32_RR: + return NVPTX::TEX_1D_ARRAY_U32_F32_RI; + case NVPTX::TEX_1D_ARRAY_U32_F32_IR: + return NVPTX::TEX_1D_ARRAY_U32_F32_II; + case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR: + return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RI; + case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_IR: + return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_II; + case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR: + return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RI; + case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_IR: + return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_II; + case NVPTX::TEX_2D_F32_S32_RR: + return NVPTX::TEX_2D_F32_S32_RI; + case NVPTX::TEX_2D_F32_S32_IR: + return NVPTX::TEX_2D_F32_S32_II; + case NVPTX::TEX_2D_F32_F32_RR: + return NVPTX::TEX_2D_F32_F32_RI; + case NVPTX::TEX_2D_F32_F32_IR: + return NVPTX::TEX_2D_F32_F32_II; + case NVPTX::TEX_2D_F32_F32_LEVEL_RR: + return NVPTX::TEX_2D_F32_F32_LEVEL_RI; + case NVPTX::TEX_2D_F32_F32_LEVEL_IR: + return NVPTX::TEX_2D_F32_F32_LEVEL_II; + case NVPTX::TEX_2D_F32_F32_GRAD_RR: + return NVPTX::TEX_2D_F32_F32_GRAD_RI; + case NVPTX::TEX_2D_F32_F32_GRAD_IR: + return NVPTX::TEX_2D_F32_F32_GRAD_II; + case NVPTX::TEX_2D_S32_S32_RR: + return NVPTX::TEX_2D_S32_S32_RI; + case NVPTX::TEX_2D_S32_S32_IR: + return NVPTX::TEX_2D_S32_S32_II; + case NVPTX::TEX_2D_S32_F32_RR: + return NVPTX::TEX_2D_S32_F32_RI; + case NVPTX::TEX_2D_S32_F32_IR: + return NVPTX::TEX_2D_S32_F32_II; + case NVPTX::TEX_2D_S32_F32_LEVEL_RR: + return NVPTX::TEX_2D_S32_F32_LEVEL_RI; + case NVPTX::TEX_2D_S32_F32_LEVEL_IR: + return NVPTX::TEX_2D_S32_F32_LEVEL_II; + case NVPTX::TEX_2D_S32_F32_GRAD_RR: + return NVPTX::TEX_2D_S32_F32_GRAD_RI; + case NVPTX::TEX_2D_S32_F32_GRAD_IR: + return NVPTX::TEX_2D_S32_F32_GRAD_II; + case NVPTX::TEX_2D_U32_S32_RR: + return NVPTX::TEX_2D_U32_S32_RI; + case NVPTX::TEX_2D_U32_S32_IR: + return NVPTX::TEX_2D_U32_S32_II; + case NVPTX::TEX_2D_U32_F32_RR: + return NVPTX::TEX_2D_U32_F32_RI; + case NVPTX::TEX_2D_U32_F32_IR: + return NVPTX::TEX_2D_U32_F32_II; + case NVPTX::TEX_2D_U32_F32_LEVEL_RR: + return NVPTX::TEX_2D_U32_F32_LEVEL_RI; + case NVPTX::TEX_2D_U32_F32_LEVEL_IR: + return NVPTX::TEX_2D_U32_F32_LEVEL_II; + case NVPTX::TEX_2D_U32_F32_GRAD_RR: + return NVPTX::TEX_2D_U32_F32_GRAD_RI; + case NVPTX::TEX_2D_U32_F32_GRAD_IR: + return NVPTX::TEX_2D_U32_F32_GRAD_II; + case NVPTX::TEX_2D_ARRAY_F32_S32_RR: + return NVPTX::TEX_2D_ARRAY_F32_S32_RI; + case NVPTX::TEX_2D_ARRAY_F32_S32_IR: + return NVPTX::TEX_2D_ARRAY_F32_S32_II; + case NVPTX::TEX_2D_ARRAY_F32_F32_RR: + return NVPTX::TEX_2D_ARRAY_F32_F32_RI; + case NVPTX::TEX_2D_ARRAY_F32_F32_IR: + return NVPTX::TEX_2D_ARRAY_F32_F32_II; + case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR: + return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RI; + case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_IR: + return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_II; + case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR: + return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RI; + case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_IR: + return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_II; + case NVPTX::TEX_2D_ARRAY_S32_S32_RR: + return NVPTX::TEX_2D_ARRAY_S32_S32_RI; + case NVPTX::TEX_2D_ARRAY_S32_S32_IR: + return NVPTX::TEX_2D_ARRAY_S32_S32_II; + case NVPTX::TEX_2D_ARRAY_S32_F32_RR: + return NVPTX::TEX_2D_ARRAY_S32_F32_RI; + case NVPTX::TEX_2D_ARRAY_S32_F32_IR: + return NVPTX::TEX_2D_ARRAY_S32_F32_II; + case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR: + return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RI; + case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_IR: + return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_II; + case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR: + return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RI; + case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_IR: + return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_II; + case NVPTX::TEX_2D_ARRAY_U32_S32_RR: + return NVPTX::TEX_2D_ARRAY_U32_S32_RI; + case NVPTX::TEX_2D_ARRAY_U32_S32_IR: + return NVPTX::TEX_2D_ARRAY_U32_S32_II; + case NVPTX::TEX_2D_ARRAY_U32_F32_RR: + return NVPTX::TEX_2D_ARRAY_U32_F32_RI; + case NVPTX::TEX_2D_ARRAY_U32_F32_IR: + return NVPTX::TEX_2D_ARRAY_U32_F32_II; + case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR: + return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RI; + case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_IR: + return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_II; + case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR: + return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RI; + case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_IR: + return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_II; + case NVPTX::TEX_3D_F32_S32_RR: + return NVPTX::TEX_3D_F32_S32_RI; + case NVPTX::TEX_3D_F32_S32_IR: + return NVPTX::TEX_3D_F32_S32_II; + case NVPTX::TEX_3D_F32_F32_RR: + return NVPTX::TEX_3D_F32_F32_RI; + case NVPTX::TEX_3D_F32_F32_IR: + return NVPTX::TEX_3D_F32_F32_II; + case NVPTX::TEX_3D_F32_F32_LEVEL_RR: + return NVPTX::TEX_3D_F32_F32_LEVEL_RI; + case NVPTX::TEX_3D_F32_F32_LEVEL_IR: + return NVPTX::TEX_3D_F32_F32_LEVEL_II; + case NVPTX::TEX_3D_F32_F32_GRAD_RR: + return NVPTX::TEX_3D_F32_F32_GRAD_RI; + case NVPTX::TEX_3D_F32_F32_GRAD_IR: + return NVPTX::TEX_3D_F32_F32_GRAD_II; + case NVPTX::TEX_3D_S32_S32_RR: + return NVPTX::TEX_3D_S32_S32_RI; + case NVPTX::TEX_3D_S32_S32_IR: + return NVPTX::TEX_3D_S32_S32_II; + case NVPTX::TEX_3D_S32_F32_RR: + return NVPTX::TEX_3D_S32_F32_RI; + case NVPTX::TEX_3D_S32_F32_IR: + return NVPTX::TEX_3D_S32_F32_II; + case NVPTX::TEX_3D_S32_F32_LEVEL_RR: + return NVPTX::TEX_3D_S32_F32_LEVEL_RI; + case NVPTX::TEX_3D_S32_F32_LEVEL_IR: + return NVPTX::TEX_3D_S32_F32_LEVEL_II; + case NVPTX::TEX_3D_S32_F32_GRAD_RR: + return NVPTX::TEX_3D_S32_F32_GRAD_RI; + case NVPTX::TEX_3D_S32_F32_GRAD_IR: + return NVPTX::TEX_3D_S32_F32_GRAD_II; + case NVPTX::TEX_3D_U32_S32_RR: + return NVPTX::TEX_3D_U32_S32_RI; + case NVPTX::TEX_3D_U32_S32_IR: + return NVPTX::TEX_3D_U32_S32_II; + case NVPTX::TEX_3D_U32_F32_RR: + return NVPTX::TEX_3D_U32_F32_RI; + case NVPTX::TEX_3D_U32_F32_IR: + return NVPTX::TEX_3D_U32_F32_II; + case NVPTX::TEX_3D_U32_F32_LEVEL_RR: + return NVPTX::TEX_3D_U32_F32_LEVEL_RI; + case NVPTX::TEX_3D_U32_F32_LEVEL_IR: + return NVPTX::TEX_3D_U32_F32_LEVEL_II; + case NVPTX::TEX_3D_U32_F32_GRAD_RR: + return NVPTX::TEX_3D_U32_F32_GRAD_RI; + case NVPTX::TEX_3D_U32_F32_GRAD_IR: + return NVPTX::TEX_3D_U32_F32_GRAD_II; + case NVPTX::TEX_CUBE_F32_F32_RR: + return NVPTX::TEX_CUBE_F32_F32_RI; + case NVPTX::TEX_CUBE_F32_F32_IR: + return NVPTX::TEX_CUBE_F32_F32_II; + case NVPTX::TEX_CUBE_F32_F32_LEVEL_RR: + return NVPTX::TEX_CUBE_F32_F32_LEVEL_RI; + case NVPTX::TEX_CUBE_F32_F32_LEVEL_IR: + return NVPTX::TEX_CUBE_F32_F32_LEVEL_II; + case NVPTX::TEX_CUBE_S32_F32_RR: + return NVPTX::TEX_CUBE_S32_F32_RI; + case NVPTX::TEX_CUBE_S32_F32_IR: + return NVPTX::TEX_CUBE_S32_F32_II; + case NVPTX::TEX_CUBE_S32_F32_LEVEL_RR: + return NVPTX::TEX_CUBE_S32_F32_LEVEL_RI; + case NVPTX::TEX_CUBE_S32_F32_LEVEL_IR: + return NVPTX::TEX_CUBE_S32_F32_LEVEL_II; + case NVPTX::TEX_CUBE_U32_F32_RR: + return NVPTX::TEX_CUBE_U32_F32_RI; + case NVPTX::TEX_CUBE_U32_F32_IR: + return NVPTX::TEX_CUBE_U32_F32_II; + case NVPTX::TEX_CUBE_U32_F32_LEVEL_RR: + return NVPTX::TEX_CUBE_U32_F32_LEVEL_RI; + case NVPTX::TEX_CUBE_U32_F32_LEVEL_IR: + return NVPTX::TEX_CUBE_U32_F32_LEVEL_II; + case NVPTX::TEX_CUBE_ARRAY_F32_F32_RR: + return NVPTX::TEX_CUBE_ARRAY_F32_F32_RI; + case NVPTX::TEX_CUBE_ARRAY_F32_F32_IR: + return NVPTX::TEX_CUBE_ARRAY_F32_F32_II; + case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR: + return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RI; + case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_IR: + return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_II; + case NVPTX::TEX_CUBE_ARRAY_S32_F32_RR: + return NVPTX::TEX_CUBE_ARRAY_S32_F32_RI; + case NVPTX::TEX_CUBE_ARRAY_S32_F32_IR: + return NVPTX::TEX_CUBE_ARRAY_S32_F32_II; + case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR: + return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RI; + case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_IR: + return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_II; + case NVPTX::TEX_CUBE_ARRAY_U32_F32_RR: + return NVPTX::TEX_CUBE_ARRAY_U32_F32_RI; + case NVPTX::TEX_CUBE_ARRAY_U32_F32_IR: + return NVPTX::TEX_CUBE_ARRAY_U32_F32_II; + case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR: + return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RI; + case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_IR: + return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_II; + case NVPTX::TLD4_R_2D_F32_F32_RR: + return NVPTX::TLD4_R_2D_F32_F32_RI; + case NVPTX::TLD4_R_2D_F32_F32_IR: + return NVPTX::TLD4_R_2D_F32_F32_II; + case NVPTX::TLD4_G_2D_F32_F32_RR: + return NVPTX::TLD4_G_2D_F32_F32_RI; + case NVPTX::TLD4_G_2D_F32_F32_IR: + return NVPTX::TLD4_G_2D_F32_F32_II; + case NVPTX::TLD4_B_2D_F32_F32_RR: + return NVPTX::TLD4_B_2D_F32_F32_RI; + case NVPTX::TLD4_B_2D_F32_F32_IR: + return NVPTX::TLD4_B_2D_F32_F32_II; + case NVPTX::TLD4_A_2D_F32_F32_RR: + return NVPTX::TLD4_A_2D_F32_F32_RI; + case NVPTX::TLD4_A_2D_F32_F32_IR: + return NVPTX::TLD4_A_2D_F32_F32_II; + case NVPTX::TLD4_R_2D_S32_F32_RR: + return NVPTX::TLD4_R_2D_S32_F32_RI; + case NVPTX::TLD4_R_2D_S32_F32_IR: + return NVPTX::TLD4_R_2D_S32_F32_II; + case NVPTX::TLD4_G_2D_S32_F32_RR: + return NVPTX::TLD4_G_2D_S32_F32_RI; + case NVPTX::TLD4_G_2D_S32_F32_IR: + return NVPTX::TLD4_G_2D_S32_F32_II; + case NVPTX::TLD4_B_2D_S32_F32_RR: + return NVPTX::TLD4_B_2D_S32_F32_RI; + case NVPTX::TLD4_B_2D_S32_F32_IR: + return NVPTX::TLD4_B_2D_S32_F32_II; + case NVPTX::TLD4_A_2D_S32_F32_RR: + return NVPTX::TLD4_A_2D_S32_F32_RI; + case NVPTX::TLD4_A_2D_S32_F32_IR: + return NVPTX::TLD4_A_2D_S32_F32_II; + case NVPTX::TLD4_R_2D_U32_F32_RR: + return NVPTX::TLD4_R_2D_U32_F32_RI; + case NVPTX::TLD4_R_2D_U32_F32_IR: + return NVPTX::TLD4_R_2D_U32_F32_II; + case NVPTX::TLD4_G_2D_U32_F32_RR: + return NVPTX::TLD4_G_2D_U32_F32_RI; + case NVPTX::TLD4_G_2D_U32_F32_IR: + return NVPTX::TLD4_G_2D_U32_F32_II; + case NVPTX::TLD4_B_2D_U32_F32_RR: + return NVPTX::TLD4_B_2D_U32_F32_RI; + case NVPTX::TLD4_B_2D_U32_F32_IR: + return NVPTX::TLD4_B_2D_U32_F32_II; + case NVPTX::TLD4_A_2D_U32_F32_RR: + return NVPTX::TLD4_A_2D_U32_F32_RI; + case NVPTX::TLD4_A_2D_U32_F32_IR: + return NVPTX::TLD4_A_2D_U32_F32_II; + default: + llvm_unreachable("Unhandled TEX opcode"); + }; +} + +static unsigned queryRegisterToIndexOpcode(unsigned RegOC) { + switch (RegOC) { + case NVPTX::TXQ_CHANNEL_ORDER_R: + return NVPTX::TXQ_CHANNEL_ORDER_I; + case NVPTX::TXQ_CHANNEL_DATA_TYPE_R: + return NVPTX::TXQ_CHANNEL_DATA_TYPE_I; + case NVPTX::TXQ_WIDTH_R: + return NVPTX::TXQ_WIDTH_I; + case NVPTX::TXQ_HEIGHT_R: + return NVPTX::TXQ_HEIGHT_I; + case NVPTX::TXQ_DEPTH_R: + return NVPTX::TXQ_DEPTH_I; + case NVPTX::TXQ_ARRAY_SIZE_R: + return NVPTX::TXQ_ARRAY_SIZE_I; + case NVPTX::TXQ_NUM_SAMPLES_R: + return NVPTX::TXQ_NUM_SAMPLES_I; + case NVPTX::TXQ_NUM_MIPMAP_LEVELS_R: + return NVPTX::TXQ_NUM_MIPMAP_LEVELS_I; + case NVPTX::SUQ_CHANNEL_ORDER_R: + return NVPTX::SUQ_CHANNEL_ORDER_I; + case NVPTX::SUQ_CHANNEL_DATA_TYPE_R: + return NVPTX::SUQ_CHANNEL_DATA_TYPE_I; + case NVPTX::SUQ_WIDTH_R: + return NVPTX::SUQ_WIDTH_I; + case NVPTX::SUQ_HEIGHT_R: + return NVPTX::SUQ_HEIGHT_I; + case NVPTX::SUQ_DEPTH_R: + return NVPTX::SUQ_DEPTH_I; + case NVPTX::SUQ_ARRAY_SIZE_R: + return NVPTX::SUQ_ARRAY_SIZE_I; + default: + llvm_unreachable("Unhandled TXQ/SUQ opcode"); + }; +} + bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) { MachineFunction &MF = *MI.getParent()->getParent(); const MCInstrDesc &MCID = MI.getDesc(); + const NVPTXInstrInfo *TII = MF.getSubtarget().getInstrInfo(); if (MCID.TSFlags & NVPTXII::IsTexFlag) { // This is a texture fetch, so operand 4 is a texref and operand 5 is // a samplerref MachineOperand &TexHandle = MI.getOperand(4); - replaceImageHandle(TexHandle, MF); + if (replaceImageHandle(TexHandle, MF)) + MI.setDesc(TII->get(texRegisterToIndexOpcode(MI.getOpcode()))); if (!(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) { MachineOperand &SampHandle = MI.getOperand(5); - replaceImageHandle(SampHandle, MF); + if (replaceImageHandle(SampHandle, MF)) + MI.setDesc(TII->get(samplerRegisterToIndexOpcode(MI.getOpcode()))); } return true; @@ -99,21 +1755,24 @@ // For a surface load of vector size N, the Nth operand will be the surfref MachineOperand &SurfHandle = MI.getOperand(VecSize); - replaceImageHandle(SurfHandle, MF); + if (replaceImageHandle(SurfHandle, MF)) + MI.setDesc(TII->get(suldRegisterToIndexOpcode(MI.getOpcode()))); return true; } else if (MCID.TSFlags & NVPTXII::IsSustFlag) { // This is a surface store, so operand 0 is a surfref MachineOperand &SurfHandle = MI.getOperand(0); - replaceImageHandle(SurfHandle, MF); + if (replaceImageHandle(SurfHandle, MF)) + MI.setDesc(TII->get(sustRegisterToIndexOpcode(MI.getOpcode()))); return true; } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { // This is a query, so operand 1 is a surfref/texref MachineOperand &Handle = MI.getOperand(1); - replaceImageHandle(Handle, MF); + if (replaceImageHandle(Handle, MF)) + MI.setDesc(TII->get(queryRegisterToIndexOpcode(MI.getOpcode()))); return true; } @@ -121,12 +1780,14 @@ return false; } -void NVPTXReplaceImageHandles:: -replaceImageHandle(MachineOperand &Op, MachineFunction &MF) { +bool NVPTXReplaceImageHandles::replaceImageHandle(MachineOperand &Op, + MachineFunction &MF) { unsigned Idx; if (findIndexForHandle(Op, MF, Idx)) { Op.ChangeToImmediate(Idx); + return true; } + return false; } bool NVPTXReplaceImageHandles:: diff --git a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll --- a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20 -; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30 +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 +; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 target triple = "nvptx-unknown-cuda" diff --git a/llvm/test/CodeGen/NVPTX/surf-read.ll b/llvm/test/CodeGen/NVPTX/surf-read.ll --- a/llvm/test/CodeGen/NVPTX/surf-read.ll +++ b/llvm/test/CodeGen/NVPTX/surf-read.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s target triple = "nvptx-unknown-nvcl" diff --git a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll --- a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20 -; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30 +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 +; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 target triple = "nvptx-unknown-cuda" diff --git a/llvm/test/CodeGen/NVPTX/surf-write.ll b/llvm/test/CodeGen/NVPTX/surf-write.ll --- a/llvm/test/CodeGen/NVPTX/surf-write.ll +++ b/llvm/test/CodeGen/NVPTX/surf-write.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s target triple = "nvptx-unknown-nvcl" diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll --- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20 -; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30 +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 +; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 target triple = "nvptx-unknown-cuda" diff --git a/llvm/test/CodeGen/NVPTX/tex-read.ll b/llvm/test/CodeGen/NVPTX/tex-read.ll --- a/llvm/test/CodeGen/NVPTX/tex-read.ll +++ b/llvm/test/CodeGen/NVPTX/tex-read.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s target triple = "nvptx-unknown-nvcl" diff --git a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll --- a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll +++ b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20 -; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30 +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 +; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 target triple = "nvptx-unknown-cuda"