Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1805,6 +1805,10 @@ if (OptLevel == CodeGenOpt::None) return SDValue(); + // Don't simplify the token factor if the node itself has too many operands. + if (N->getNumOperands() > TokenFactorInlineLimit) + return SDValue(); + // If the sole user is a token factor, we should make sure we have a // chance to merge them together. This prevents TF chains from inhibiting // optimizations. Index: llvm/test/CodeGen/AMDGPU/token-factor-inline-limit-test.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/token-factor-inline-limit-test.ll @@ -0,0 +1,58 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-TFILD %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -combiner-tokenfactor-inline-limit=7 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-TFIL7 %s + + +; GCN-LABEL: {{^}}token_factor_inline_limit_test: + +; GCN-TFILD: v_mov_b32_e32 [[REG8:v[0-9]+]], 8 +; GCN-TFILD: v_mov_b32_e32 [[REG9:v[0-9]+]], 9 +; GCN-TFILD: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 +; GCN-TFILD: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 +; GCN-TFILD: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 +; GCN-TFILD: buffer_store_dword [[REG8]], {{.*$}} +; GCN-TFILD: buffer_store_dword [[REG9]], {{.*}} offset:4 +; GCN-TFILD: buffer_store_dword [[REG10]], {{.*}} offset:8 +; GCN-TFILD: buffer_store_dword [[REG11]], {{.*}} offset:12 +; GCN-TFILD: buffer_store_dword [[REG12]], {{.*}} offset:16 +; GCN-TFILD: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 +; GCN-TFILD: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 +; GCN-TFILD: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 +; GCN-TFILD: buffer_store_dword [[REG13]], {{.*}} offset:20 +; GCN-TFILD: buffer_store_dword [[REG14]], {{.*}} offset:24 +; GCN-TFILD: buffer_store_dword [[REG15]], {{.*}} offset:28 + +; GCN-TFIL7: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 +; GCN-TFIL7: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 +; GCN-TFIL7: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 +; GCN-TFIL7: buffer_store_dword [[REG15]], {{.*}} offset:28 +; GCN-TFIL7: buffer_store_dword [[REG14]], {{.*}} offset:24 +; GCN-TFIL7: buffer_store_dword [[REG13]], {{.*}} offset:20 +; GCN-TFIL7: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 +; GCN-TFIL7: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 +; GCN-TFIL7: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 +; GCN-TFIL7: v_mov_b32_e32 [[REG9:v[0-9]+]], 9 +; GCN-TFIL7: v_mov_b32_e32 [[REG8:v[0-9]+]], 8 +; GCN-TFIL7: buffer_store_dword [[REG12]], {{.*}} offset:16 +; GCN-TFIL7: buffer_store_dword [[REG11]], {{.*}} offset:12 +; GCN-TFIL7: buffer_store_dword [[REG10]], {{.*}} offset:8 +; GCN-TFIL7: buffer_store_dword [[REG9]], {{.*}} offset:4 +; GCN-TFIL7: buffer_store_dword [[REG8]], {{.*$}} + +; GCN: v_mov_b32_e32 v31, 7 +; GCN: s_getpc +define void @token_factor_inline_limit_test() { +entry: + call void @external_void_func_8xv5i32( + <5 x i32>, + <5 x i32>, + <5 x i32>, + <5 x i32>, + <5 x i32>, + <5 x i32>, + <5 x i32>, + <5 x i32>) + ret void +} + +declare hidden void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>, + <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>)