Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -1,188 +1,121 @@ -//===-- AMDGPU.td - AMDGPU Tablegen files ------------------*- tablegen -*-===// +//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -//===----------------------------------------------------------------------===// +//===------------------------------------------------------------===// include "llvm/Target/Target.td" -//===----------------------------------------------------------------------===// -// Subtarget Features -//===----------------------------------------------------------------------===// - -// Debugging Features - -def FeatureDumpCode : SubtargetFeature <"DumpCode", - "DumpCode", - "true", - "Dump MachineInstrs in the CodeEmitter">; - -def FeatureDumpCodeLower : SubtargetFeature <"dumpcode", - "DumpCode", - "true", - "Dump MachineInstrs in the CodeEmitter">; - -def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer", - "EnableIRStructurizer", - "false", - "Disable IR Structurizer">; - -def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca", - "EnablePromoteAlloca", - "true", - "Enable promote alloca pass">; - -// Target features - -def FeatureIfCvt : SubtargetFeature <"disable-ifcvt", - "EnableIfCvt", - "false", - "Disable the if conversion pass">; +//===------------------------------------------------------------===// +// Subtarget Features (device properties) +//===------------------------------------------------------------===// def FeatureFP64 : SubtargetFeature<"fp64", - "FP64", - "true", - "Enable double precision operations">; - -def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals", - "FP64Denormals", - "true", - "Enable double precision denormal handling", - [FeatureFP64]>; + "FP64", + "true", + "Enable double precision operations" +>; def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", - "FastFMAF32", - "true", - "Assuming f32 fma is at least as fast as mul + add", - []>; + "FastFMAF32", + "true", + "Assuming f32 fma is at least as fast as mul + add" +>; def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops", - "HalfRate64Ops", - "true", - "Most fp64 instructions are half rate instead of quarter", - []>; - -// Some instructions do not support denormals despite this flag. Using -// fp32 denormals also causes instructions to run at the double -// precision rate for the device. -def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals", - "FP32Denormals", - "true", - "Enable single precision denormal handling">; + "HalfRate64Ops", + "true", + "Most fp64 instructions are half rate instead of quarter" +>; def Feature64BitPtr : SubtargetFeature<"64BitPtr", - "Is64bit", - "true", - "Specify if 64-bit addressing should be used">; + "Is64bit", + "true", + "Specify if 64-bit addressing should be used" +>; def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", - "R600ALUInst", - "false", - "Older version of ALU instructions encoding">; + "R600ALUInst", + "false", + "Older version of ALU instructions encoding" +>; def FeatureVertexCache : SubtargetFeature<"HasVertexCache", - "HasVertexCache", - "true", - "Specify use of dedicated vertex cache">; + "HasVertexCache", + "true", + "Specify use of dedicated vertex cache" +>; def FeatureCaymanISA : SubtargetFeature<"caymanISA", - "CaymanISA", - "true", - "Use Cayman ISA">; + "CaymanISA", + "true", + "Use Cayman ISA" +>; def FeatureCFALUBug : SubtargetFeature<"cfalubug", - "CFALUBug", - "true", - "GPU has CF_ALU bug">; - -// XXX - This should probably be removed once enabled by default -def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", - "EnableLoadStoreOpt", - "true", - "Enable SI load/store optimizer pass">; - -// Performance debugging feature. Allow using DS instruction immediate -// offsets even if the base pointer can't be proven to be base. On SI, -// base pointer values that won't give the same result as a 16-bit add -// are not safe to fold, but this will override the conservative test -// for the base pointer. -def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-folding", - "EnableUnsafeDSOffsetFolding", - "true", - "Force using DS instruction immediate offsets on SI">; - -def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", - "FlatForGlobal", - "true", - "Force to generate flat instruction for global">; + "CFALUBug", + "true", + "GPU has CF_ALU bug" +>; def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", - "FlatAddressSpace", - "true", - "Support flat address space">; + "FlatAddressSpace", + "true", + "Support flat address space" +>; def FeatureXNACK : SubtargetFeature<"xnack", - "EnableXNACK", - "true", - "Enable XNACK support">; - -def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", - "EnableVGPRSpilling", - "true", - "Enable spilling of VGPRs to scratch memory">; + "EnableXNACK", + "true", + "Enable XNACK support" +>; def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", - "SGPRInitBug", - "true", - "VI SGPR initilization bug requiring a fixed SGPR allocation size">; - -def FeatureEnableHugeScratchBuffer : SubtargetFeature<"huge-scratch-buffer", - "EnableHugeScratchBuffer", - "true", - "Enable scratch buffer sizes greater than 128 GB">; - -def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", - "EnableSIScheduler", - "true", - "Enable SI Machine Scheduler">; + "SGPRInitBug", + "true", + "VI SGPR initilization bug requiring a fixed SGPR allocation size" +>; class SubtargetFeatureFetchLimit : SubtargetFeature <"fetch"#Value, - "TexVTXClauseSize", - Value, - "Limit the maximum number of fetches in a clause to "#Value>; + "TexVTXClauseSize", + Value, + "Limit the maximum number of fetches in a clause to "#Value +>; def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">; def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">; class SubtargetFeatureWavefrontSize : SubtargetFeature< - "wavefrontsize"#Value, - "WavefrontSize", - !cast(Value), - "The number of threads per wavefront">; + "wavefrontsize"#Value, + "WavefrontSize", + !cast(Value), + "The number of threads per wavefront" +>; def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>; def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>; def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>; class SubtargetFeatureLDSBankCount : SubtargetFeature < - "ldsbankcount"#Value, - "LDSBankCount", - !cast(Value), - "The number of LDS banks per compute unit.">; + "ldsbankcount"#Value, + "LDSBankCount", + !cast(Value), + "The number of LDS banks per compute unit." +>; def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>; def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>; class SubtargetFeatureISAVersion : SubtargetFeature < - "isaver"#Major#"."#Minor#"."#Stepping, - "IsaVersion", - "ISAVersion"#Major#"_"#Minor#"_"#Stepping, - "Instruction set version number" + "isaver"#Major#"."#Minor#"."#Stepping, + "IsaVersion", + "ISAVersion"#Major#"_"#Minor#"_"#Stepping, + "Instruction set version number" >; def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0>; @@ -192,36 +125,135 @@ def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3>; class SubtargetFeatureLocalMemorySize : SubtargetFeature< - "localmemorysize"#Value, - "LocalMemorySize", - !cast(Value), - "The size of local memory in bytes">; + "localmemorysize"#Value, + "LocalMemorySize", + !cast(Value), + "The size of local memory in bytes" +>; def FeatureGCN : SubtargetFeature<"gcn", - "IsGCN", - "true", - "GCN or newer GPU">; + "IsGCN", + "true", + "GCN or newer GPU" +>; def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding", - "GCN1Encoding", - "true", - "Encoding format for SI and CI">; + "GCN1Encoding", + "true", + "Encoding format for SI and CI" +>; def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", - "GCN3Encoding", - "true", - "Encoding format for VI">; + "GCN3Encoding", + "true", + "Encoding format for VI" +>; def FeatureCIInsts : SubtargetFeature<"ci-insts", - "CIInsts", - "true", - "Additional intstructions for CI+">; + "CIInsts", + "true", + "Additional intstructions for CI+" +>; + +//===------------------------------------------------------------===// +// Subtarget Features (options and debugging) +//===------------------------------------------------------------===// + +// Some instructions do not support denormals despite this flag. Using +// fp32 denormals also causes instructions to run at the double +// precision rate for the device. +def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals", + "FP32Denormals", + "true", + "Enable single precision denormal handling" +>; + +def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals", + "FP64Denormals", + "true", + "Enable double precision denormal handling", + [FeatureFP64] +>; + +def FeatureEnableHugeScratchBuffer : SubtargetFeature< + "huge-scratch-buffer", + "EnableHugeScratchBuffer", + "true", + "Enable scratch buffer sizes greater than 128 GB" +>; + +def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", + "EnableVGPRSpilling", + "true", + "Enable spilling of VGPRs to scratch memory" +>; + +def FeatureDumpCode : SubtargetFeature <"DumpCode", + "DumpCode", + "true", + "Dump MachineInstrs in the CodeEmitter" +>; + +def FeatureDumpCodeLower : SubtargetFeature <"dumpcode", + "DumpCode", + "true", + "Dump MachineInstrs in the CodeEmitter" +>; + +def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer", + "EnableIRStructurizer", + "false", + "Disable IR Structurizer" +>; + +def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca", + "EnablePromoteAlloca", + "true", + "Enable promote alloca pass" +>; + +// XXX - This should probably be removed once enabled by default +def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", + "EnableLoadStoreOpt", + "true", + "Enable SI load/store optimizer pass" +>; + +// Performance debugging feature. Allow using DS instruction immediate +// offsets even if the base pointer can't be proven to be base. On SI, +// base pointer values that won't give the same result as a 16-bit add +// are not safe to fold, but this will override the conservative test +// for the base pointer. +def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature < + "unsafe-ds-offset-folding", + "EnableUnsafeDSOffsetFolding", + "true", + "Force using DS instruction immediate offsets on SI" +>; + +def FeatureIfCvt : SubtargetFeature <"disable-ifcvt", + "EnableIfCvt", + "false", + "Disable the if conversion pass" +>; + +def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", + "EnableSIScheduler", + "true", + "Enable SI Machine Scheduler" +>; + +def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", + "FlatForGlobal", + "true", + "Force to generate flat instruction for global" +>; // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", - "FeatureDisable","true", - "Dummy feature to disable assembler" - " instructions">; + "FeatureDisable","true", + "Dummy feature to disable assembler instructions" +>; class SubtargetFeatureGeneration Implies> : @@ -233,33 +265,39 @@ def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>; def FeatureR600 : SubtargetFeatureGeneration<"R600", - [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]>; + [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0] +>; def FeatureR700 : SubtargetFeatureGeneration<"R700", - [FeatureFetchLimit16, FeatureLocalMemorySize0]>; + [FeatureFetchLimit16, FeatureLocalMemorySize0] +>; def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN", - [FeatureFetchLimit16, FeatureLocalMemorySize32768]>; + [FeatureFetchLimit16, FeatureLocalMemorySize32768] +>; def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS", - [FeatureFetchLimit16, FeatureWavefrontSize64, - FeatureLocalMemorySize32768] + [FeatureFetchLimit16, FeatureWavefrontSize64, + FeatureLocalMemorySize32768] >; def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS", - [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768, - FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding, - FeatureLDSBankCount32]>; + [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768, + FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding, + FeatureLDSBankCount32] +>; def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS", - [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536, - FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace, - FeatureGCN1Encoding, FeatureCIInsts]>; + [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536, + FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace, + FeatureGCN1Encoding, FeatureCIInsts] +>; def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", - [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536, - FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, - FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]>; + [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536, + FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, + FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32] +>; //===----------------------------------------------------------------------===// @@ -289,6 +327,7 @@ //===----------------------------------------------------------------------===// def TruePredicate : Predicate<"true">; + def isSICI : Predicate< "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" @@ -298,6 +337,13 @@ "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, AssemblerPredicate<"FeatureGCN3Encoding">; +def isCIVI : Predicate < + "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || " + "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS" +>, AssemblerPredicate<"FeatureCIInsts">; + +def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">; + class PredicateControl { Predicate SubtargetPredicate; Predicate SIAssemblerPredicate = isSICI; Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -587,13 +587,6 @@ [{ (void)N; return TM.Options.NoNaNsFPMath; }] >; -/* -class UMUL24Pattern : Pat < - (mul U24:$x, U24:$y), - (UMUL24 $x, $y) ->; -*/ - class IMad24Pat : Pat < (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), (Inst $src0, $src1, $src2) Index: lib/Target/AMDGPU/CIInstructions.td =================================================================== --- lib/Target/AMDGPU/CIInstructions.td +++ lib/Target/AMDGPU/CIInstructions.td @@ -25,14 +25,6 @@ // BUFFER_LOAD_DWORDX3 // BUFFER_STORE_DWORDX3 - -def isCIVI : Predicate < - "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || " - "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS" ->, AssemblerPredicate<"FeatureCIInsts">; - -def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">; - //===----------------------------------------------------------------------===// // VOP1 Instructions //===----------------------------------------------------------------------===// @@ -262,7 +254,7 @@ flat<0x60>, "flat_atomic_fmax_x2", VReg_64 >; -} // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst +} // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst let Predicates = [isCI] in { @@ -289,7 +281,7 @@ let Predicates = [isCIVI] in { -// Patterns for global loads with no offset +// Patterns for global loads with no offset. class FlatLoadPat : Pat < (vt (node i64:$addr)), (inst $addr, 0, 0, 0) Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -101,7 +101,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in { defm S_MOV_B32 : SOP1_32 , "s_mov_b32", []>; defm S_MOV_B64 : SOP1_64 , "s_mov_b64", []>; - } // let isRematerializeable = 1 + } // End isRematerializeable = 1 let Uses = [SCC] in { defm S_CMOV_B32 : SOP1_32 , "s_cmov_b32", []>; @@ -1234,7 +1234,7 @@ VOP_F64_I32, uint_to_fp >; -} // let SchedRW = [WriteQuarterRate32] +} // End SchedRW = [WriteQuarterRate32] defm V_FRACT_F32 : VOP1Inst , "v_fract_f32", VOP_F32_F32, AMDGPUfract @@ -1270,7 +1270,7 @@ VOP_F32_F32, AMDGPUrsq >; -} //let SchedRW = [WriteQuarterRate32] +} // End SchedRW = [WriteQuarterRate32] let SchedRW = [WriteDouble] in { @@ -1281,7 +1281,7 @@ VOP_F64_F64, AMDGPUrsq >; -} // let SchedRW = [WriteDouble]; +} // End SchedRW = [WriteDouble]; defm V_SQRT_F32 : VOP1Inst , "v_sqrt_f32", VOP_F32_F32, fsqrt @@ -1710,7 +1710,7 @@ defm V_SAD_U32 : VOP3Inst , "v_sad_u32", VOP_I32_I32_I32_I32 >; -////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>; +//def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>; defm V_DIV_FIXUP_F32 : VOP3Inst < vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup >; @@ -1740,13 +1740,13 @@ VOP_F64_F64_F64, fmaxnum >; -} // isCommutable = 1 +} // End isCommutable = 1 defm V_LDEXP_F64 : VOP3Inst , "v_ldexp_f64", VOP_F64_F64_I32, AMDGPUldexp >; -} // let SchedRW = [WriteDoubleAdd] +} // End let SchedRW = [WriteDoubleAdd] let isCommutable = 1, SchedRW = [WriteQuarterRate32] in { @@ -1764,7 +1764,7 @@ VOP_I32_I32_I32, mulhs >; -} // isCommutable = 1, SchedRW = [WriteQuarterRate32] +} // End isCommutable = 1, SchedRW = [WriteQuarterRate32] let SchedRW = [WriteFloatFMA, WriteSALU] in { defm V_DIV_SCALE_F32 : VOP3bInst , "v_div_scale_f32", @@ -1777,7 +1777,7 @@ defm V_DIV_SCALE_F64 : VOP3bInst , "v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64 >; -} // let SchedRW = [WriteDouble] +} // End SchedRW = [WriteDouble] let isCommutable = 1, Uses = [VCC, EXEC] in { @@ -1814,7 +1814,7 @@ vop3<0x174, 0x292>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop >; -} // let SchedRW = [WriteDouble] +} // End SchedRW = [WriteDouble] // These instructions only exist on SI and CI let SubtargetPredicate = isSICI in { @@ -1856,7 +1856,7 @@ // 64-bit vector move instruction. This is mainly used by the SIFoldOperands // pass to enable folding of inline immediates. def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>; -} // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0 +} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0 let hasSideEffects = 1, SALU = 1 in { def SGPR_USE : InstSI <(outs),(ins), "", []>; @@ -1893,7 +1893,7 @@ [(int_amdgcn_loop i64:$saved, bb:$target)] >; -} // end isBranch = 1, isTerminator = 1 +} // End isBranch = 1, isTerminator = 1 def SI_BREAK : InstSI < (outs SReg_64:$dst), @@ -1934,7 +1934,7 @@ >; } // End Uses = [EXEC], Defs = [EXEC,VCC] -} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 +} // End mayLoad = 1, mayStore = 1, hasSideEffects = 1 let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { @@ -1967,7 +1967,7 @@ def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST; def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST; -} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0] +} // End Uses = [EXEC], Defs = [EXEC,VCC,M0] multiclass SI_SPILL_SGPR { @@ -1975,8 +1975,7 @@ def _SAVE : InstSI < (outs), (ins sgpr_class:$src, i32imm:$frame_idx), - "", [] - > { + "", []> { let mayStore = 1; let mayLoad = 0; } @@ -1984,8 +1983,7 @@ def _RESTORE : InstSI < (outs sgpr_class:$dst), (ins i32imm:$frame_idx), - "", [] - > { + "", []> { let mayStore = 0; let mayLoad = 1; } @@ -2007,8 +2005,7 @@ (outs), (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), - "", [] - > { + "", []> { let mayStore = 1; let mayLoad = 0; } @@ -2016,8 +2013,7 @@ def _RESTORE : InstSI < (outs vgpr_class:$dst), (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), - "", [] - > { + "", []> { let mayStore = 0; let mayLoad = 1; } @@ -2043,9 +2039,9 @@ } // End Defs = [SCC] -} // end IsCodeGenOnly, isPseudo +} // End isCodeGenOnly, isPseudo -} // end SubtargetPredicate = isGCN +} // End SubtargetPredicate = isGCN let Predicates = [isGCN] in { @@ -2060,7 +2056,6 @@ (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0) >; -/* int_SI_export */ def : Pat < (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr, f32:$src0, f32:$src1, f32:$src2, f32:$src3), @@ -2445,11 +2440,6 @@ /********** Extraction, Insertion, Building and Casting **********/ /********** ============================================ **********/ -//def : Extract_Element; -//def : Extract_Element; -//def : Extract_Element; -//def : Extract_Element; - foreach Index = 0-2 in { def Extract_Element_v2i32_#Index : Extract_Element < i32, v2i32, Index, !cast(sub#Index) @@ -2514,46 +2504,45 @@ >; } -def : BitConvert ; +// FIXME: Why do only some of these type combinations for SReg and +// VReg? +// 32-bit bitcast def : BitConvert ; - -def : BitConvert ; def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +// 64-bit bitcast def : BitConvert ; - def : BitConvert ; - -def : BitConvert ; def : BitConvert ; -def : BitConvert ; +def : BitConvert ; def : BitConvert ; -def : BitConvert ; +def : BitConvert ; def : BitConvert ; -def : BitConvert ; -def : BitConvert ; +def : BitConvert ; def : BitConvert ; +def : BitConvert ; def : BitConvert ; -def : BitConvert ; +def : BitConvert ; def : BitConvert ; +def : BitConvert ; - +// 128-bit bitcast def : BitConvert ; def : BitConvert ; - def : BitConvert ; def : BitConvert ; def : BitConvert ; def : BitConvert ; - - - -def : BitConvert ; +// 256-bit bitcast def : BitConvert ; +def : BitConvert ; def : BitConvert ; def : BitConvert ; +// 512-bit bitcast def : BitConvert ; def : BitConvert ; @@ -2575,7 +2564,7 @@ def : Pat < (fneg (fabs f32:$src)), - (S_OR_B32 $src, 0x80000000) /* Set sign bit */ + (S_OR_B32 $src, 0x80000000) // Set sign bit >; // FIXME: Should use S_OR_B32 @@ -2665,7 +2654,6 @@ /********** Intrinsic Patterns **********/ /********** ================== **********/ -/* llvm.AMDGPU.pow */ def : POW_Common ; def : Pat < @@ -2702,7 +2690,7 @@ def : Ext32Pat ; def : Ext32Pat ; -// Offset in an 32Bit VGPR +// Offset in an 32-bit VGPR def : Pat < (SIload_constant v4i32:$sbase, i32:$voff), (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0) @@ -2934,22 +2922,6 @@ def : MUBUFScratchStorePat ; def : MUBUFScratchStorePat ; -/* -class MUBUFStore_Pattern : Pat < - (st vt:$value, (MUBUFScratch v4i32:$srsrc, i64:$vaddr, u16imm:$offset)), - (Instr $value, $srsrc, $vaddr, $offset) ->; - -let Predicates = [isSICI] in { -def : MUBUFStore_Pattern ; -def : MUBUFStore_Pattern ; -def : MUBUFStore_Pattern ; -def : MUBUFStore_Pattern ; -def : MUBUFStore_Pattern ; -} // End Predicates = [isSICI] - -*/ - //===----------------------------------------------------------------------===// // MTBUF Patterns //===----------------------------------------------------------------------===//