diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1546,6 +1546,10 @@ Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">, AssemblerPredicate<(all_of FeatureGFX10Insts)>; +def HasGFX10Insts : + Predicate<"Subtarget->hasGFX10Insts()">, + AssemblerPredicate<(all_of FeatureGFX10Insts)>; + def isGFX10Before1030 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 &&" "!Subtarget->hasGFX10_3Insts()">, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -554,7 +554,7 @@ .scalarize(0) .widenScalarToNextPow2(0, 32) .lower(); - } else if (ST.has16BitInsts()) { + } else if (ST.has16BitInsts() && ST.hasMad64_32()) { getActionDefinitionsBuilder({G_ADD, G_SUB}) .legalFor({S32, S16}) .minScalar(0, S16) @@ -563,12 +563,11 @@ .scalarize(0); getActionDefinitionsBuilder(G_MUL) - .legalFor({S32, S16}) - .scalarize(0) - .minScalar(0, S16) - .widenScalarToNextMultipleOf(0, 32) - .custom(); - assert(ST.hasMad64_32()); + .legalFor({S32, S16}) + .scalarize(0) + .minScalar(0, S16) + .widenScalarToNextMultipleOf(0, 32) + .custom(); // Technically the saturating operations require clamp bit support, but this // was introduced at the same time as 16-bit operations. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -182,6 +182,11 @@ } unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const { + if (hasGFX10Insts() && (Opcode == AMDGPU::V_PERMLANE16_B32_e64 || + Opcode == AMDGPU::V_PERMLANEX16_B32_e64)) { + return 2; + } + if (getGeneration() < GFX10) return 1; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -849,11 +849,13 @@ bool hasLDSFPAtomicAdd() const { return GFX8Insts; } /// \returns true if the subtarget has the v_permlanex16_b32 instruction. - bool hasPermLaneX16() const { return getGeneration() >= GFX10; } + bool hasPermLaneX16() const { return hasGFX10Insts(); } /// \returns true if the subtarget has the v_permlane64_b32 instruction. bool hasPermLane64() const { return getGeneration() >= GFX11; } + bool hasGFX10Insts() const { return GFX10Insts; } + bool hasGFX11Insts() const { return GFX11Insts; } bool hasDPP() const { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -7998,6 +7998,12 @@ MCOp = NMCOp; } + if (ST.hasGFX10Insts()) { + uint16_t NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX10); + if (NMCOp != (uint16_t)-1) + MCOp = NMCOp; + } + if (ST.hasGFX11Insts()) { uint16_t NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX11); if (NMCOp != (uint16_t)-1) diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -706,7 +706,7 @@ >; -let SubtargetPredicate = isGFX10Plus in { +let Predicates = [HasGFX10Insts, Has16BitInsts] in { let isCommutable = 1, isReMaterializable = 1 in { defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile>; } // End isCommutable = 1, isReMaterializable = 1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll @@ -1,3 +1,4 @@ +; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mattr=+gfx10-insts,+16-bit-insts -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s ; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s ; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s