diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -253,33 +253,46 @@ switch (OrigMIOp) { default: break; case AMDGPU::V_ADD_U32_e32: + case AMDGPU::V_ADD_U32_e64: case AMDGPU::V_ADD_I32_e32: + case AMDGPU::V_ADD_I32_e64: case AMDGPU::V_OR_B32_e32: + case AMDGPU::V_OR_B32_e64: case AMDGPU::V_SUBREV_U32_e32: + case AMDGPU::V_SUBREV_U32_e64: case AMDGPU::V_SUBREV_I32_e32: + case AMDGPU::V_SUBREV_I32_e64: case AMDGPU::V_MAX_U32_e32: + case AMDGPU::V_MAX_U32_e64: case AMDGPU::V_XOR_B32_e32: + case AMDGPU::V_XOR_B32_e64: if (OldOpnd->getImm() == 0) return true; break; case AMDGPU::V_AND_B32_e32: + case AMDGPU::V_AND_B32_e64: case AMDGPU::V_MIN_U32_e32: + case AMDGPU::V_MIN_U32_e64: if (static_cast(OldOpnd->getImm()) == std::numeric_limits::max()) return true; break; case AMDGPU::V_MIN_I32_e32: + case AMDGPU::V_MIN_I32_e64: if (static_cast(OldOpnd->getImm()) == std::numeric_limits::max()) return true; break; case AMDGPU::V_MAX_I32_e32: + case AMDGPU::V_MAX_I32_e64: if (static_cast(OldOpnd->getImm()) == std::numeric_limits::min()) return true; break; case AMDGPU::V_MUL_I32_I24_e32: + case AMDGPU::V_MUL_I32_I24_e64: case AMDGPU::V_MUL_U32_U24_e32: + case AMDGPU::V_MUL_U32_U24_e64: if (OldOpnd->getImm() == 1) return true; break; diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir --- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir +++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir @@ -1,4 +1,4 @@ -# RUN: llc -march=amdgcn -mcpu=tonga -run-pass=gcn-dpp-combine -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=gcn-dpp-combine -o - %s | FileCheck %s --- # old is undefined: only combine when masks are fully enabled and @@ -328,6 +328,30 @@ %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $exec ... +# check for e64 modifiers +# CHECK-LABEL: name: add_u32_e64 +# CHECK: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec +# CHECK: %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec + +name: add_u32_e64 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = COPY $vgpr1 + %2:vgpr_32 = IMPLICIT_DEF + + ; this should be combined as all modifiers are default + %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec + %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec + + ; this shouldn't be combined as clamp is set + %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec + %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec +... + # tests on sequences of dpp consumers # CHECK-LABEL: name: dpp_seq # CHECK: %4:vgpr_32 = V_ADD_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec