diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -910,12 +910,6 @@ } const unsigned InstSubclass = getInstSubclass(Opc, *TII); - // Do not merge VMEM buffer instructions with "swizzled" bit set. - int Swizzled = - AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::swz); - if (Swizzled != -1 && CI.I->getOperand(Swizzled).getImm()) - return false; - DenseSet RegDefsToMove; DenseSet PhysRegUsesToMove; addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove); @@ -971,11 +965,6 @@ continue; } - int Swizzled = - AMDGPU::getNamedOperandIdx(MBBI->getOpcode(), AMDGPU::OpName::swz); - if (Swizzled != -1 && MBBI->getOperand(Swizzled).getImm()) - return false; - // Handle a case like // DS_WRITE_B32 addr, v, idx0 // w = DS_READ_B32 addr, idx0 @@ -2014,6 +2003,12 @@ if (InstClass == UNKNOWN) continue; + // Do not merge VMEM buffer instructions with "swizzled" bit set. + int Swizzled = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz); + if (Swizzled != -1 && MI.getOperand(Swizzled).getImm()) + continue; + CombineInfo CI; CI.setMI(MI, *TII, *STM); CI.Order = Order++; diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir @@ -780,9 +780,8 @@ # GFX9-LABEL: name: gfx9_tbuffer_load_merge_across_swizzle -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 12, 116, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) name: gfx9_tbuffer_load_merge_across_swizzle body: | bb.0.entry: @@ -1598,9 +1597,8 @@ # GFX10-LABEL: name: gfx10_tbuffer_load_merge_across_swizzle -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 12, 22, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) name: gfx10_tbuffer_load_merge_across_swizzle body: | bb.0.entry: