Index: llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -832,9 +832,9 @@ } } -// Convert the V_ADDC_U32_e64 into V_ADDC_U32_e32, and -// V_ADD_CO_U32_e64 into V_ADD_CO_U32_e32. This allows isConvertibleToSDWA -// to perform its transformation on V_ADD_CO_U32_e32 into V_ADD_CO_U32_sdwa. +// Convert the V_ADD_CO_U32_e64 into V_ADD_CO_U32_e32. This allows +// isConvertibleToSDWA to perform its transformation on V_ADD_CO_U32_e32 into +// V_ADD_CO_U32_sdwa. // // We are transforming from a VOP3 into a VOP2 form of the instruction. // %19:vgpr_32 = V_AND_B32_e32 255, @@ -848,8 +848,8 @@ // %47:vgpr_32 = V_ADD_CO_U32_sdwa // 0, %26.sub0:vreg_64, 0, killed %16:vgpr_32, 0, 6, 0, 6, 0, // implicit-def $vcc, implicit $exec -// %48:vgpr_32 = V_ADDC_U32_e32 -// 0, %26.sub1:vreg_64, implicit-def $vcc, implicit $vcc, implicit $exec +// %48:vgpr_32, dead %50:sreg_64_xexec = V_ADDC_U32_e64 +// %26.sub1:vreg_64, %54:vgpr_32, killed $vcc, implicit $exec void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI, const GCNSubtarget &ST) const { int Opc = MI.getOpcode(); @@ -868,10 +868,7 @@ if (!NextOp) return; MachineInstr &MISucc = *NextOp->getParent(); - // Can the successor be shrunk? - if (!TII->canShrink(MISucc, *MRI)) - return; - int SuccOpc = AMDGPU::getVOPe32(MISucc.getOpcode()); + // Make sure the carry in/out are subsequently unused. MachineOperand *CarryIn = TII->getNamedOperand(MISucc, AMDGPU::OpName::src2); if (!CarryIn) @@ -893,7 +890,6 @@ return; } - // Make the two new e32 instruction variants. // Replace MI with V_{SUB|ADD}_I32_e32 BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opc)) .add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) @@ -903,14 +899,9 @@ MI.eraseFromParent(); - // Replace MISucc with V_{SUBB|ADDC}_U32_e32 - BuildMI(MBB, MISucc, MISucc.getDebugLoc(), TII->get(SuccOpc)) - .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::vdst)) - .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src0)) - .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src1)) - .setMIFlags(MISucc.getFlags()); + // Since the carry outpur of MI is now VCC, update it's use in MISucc - MISucc.eraseFromParent(); + MISucc.substituteRegister(CarryIn->getReg(), AMDGPU::VCC, 0, *TRI); } bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI, Index: llvm/test/CodeGen/AMDGPU/sdwa-ops.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/sdwa-ops.mir +++ llvm/test/CodeGen/AMDGPU/sdwa-ops.mir @@ -4,11 +4,11 @@ # test for 3 consecutive _sdwa's # GFX9-LABEL: name: test1_add_co_sdwa # GFX9: = nsw V_ADD_CO_U32_sdwa -# GFX9-NEXT: = nuw V_ADDC_U32_e32 +# GFX9-NEXT: = nuw V_ADDC_U32_e64 # GFX9: V_ADD_CO_U32_sdwa -# GFX9-NEXT: V_ADDC_U32_e32 +# GFX9-NEXT: V_ADDC_U32_e64 # GFX9: V_ADD_CO_U32_sdwa -# GFX9-NEXT: V_ADDC_U32_e32 +# GFX9-NEXT: V_ADDC_U32_e64 --- name: test1_add_co_sdwa tracksRegLiveness: true @@ -48,7 +48,7 @@ # test for VCC interference on sdwa, should generate 1 xform only # GFX9-LABEL: name: test2_add_co_sdwa # GFX9: V_ADD_CO_U32_sdwa -# GFX9: V_ADDC_U32_e32 +# GFX9: V_ADDC_U32_e64 # GFX9-NOT: V_ADD_CO_U32_sdwa # GFX9-NOT: V_ADDC_U32_e32 --- @@ -151,7 +151,7 @@ # test for simple example, should generate sdwa # GFX9-LABEL: name: test5_add_co_sdwa # GFX9: V_ADD_CO_U32_sdwa -# GFX9: V_ADDC_U32_e32 +# GFX9: V_ADDC_U32_e64 --- name: test5_add_co_sdwa tracksRegLiveness: true @@ -388,4 +388,3 @@ %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) - Index: llvm/test/CodeGen/AMDGPU/v_add_u64_pseudo_sdwa.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/v_add_u64_pseudo_sdwa.ll +++ llvm/test/CodeGen/AMDGPU/v_add_u64_pseudo_sdwa.ll @@ -5,8 +5,7 @@ ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: v_add_u32_e32 v1, 10, v0 ; GFX9-NEXT: v_add_u32_e32 v0, 20, v0 -; GFX9-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v1, v0 +; GFX9-NEXT: v_add_co_u32_sdwa v0, vcc, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_addc_co_u32_e64 v1, s[0:1], 0, 0, vcc ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: s_endpgm @@ -27,16 +26,13 @@ ; GFX9-LABEL: test_add_co_sdwa: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dword v2, v1, s[2:3] -; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: global_load_dword v4, v2, s[2:3] ; GFX9-NEXT: global_load_dwordx2 v[0:1], v3, s[0:1] -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_add_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm Index: llvm/test/CodeGen/AMDGPU/v_sub_u64_pseudo_sdwa.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/v_sub_u64_pseudo_sdwa.ll +++ llvm/test/CodeGen/AMDGPU/v_sub_u64_pseudo_sdwa.ll @@ -5,8 +5,7 @@ ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: v_add_u32_e32 v1, 10, v0 ; GFX9-NEXT: v_add_u32_e32 v0, 20, v0 -; GFX9-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v1, v0 +; GFX9-NEXT: v_sub_co_u32_sdwa v0, vcc, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_subb_co_u32_e64 v1, s[0:1], 0, 0, vcc ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: s_endpgm @@ -27,16 +26,13 @@ ; GFX9-LABEL: test_sub_co_sdwa: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dword v2, v1, s[2:3] -; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: global_load_dword v4, v2, s[2:3] ; GFX9-NEXT: global_load_dwordx2 v[0:1], v3, s[0:1] -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_sub_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm Index: llvm/tmp.mir =================================================================== --- /dev/null +++ llvm/tmp.mir @@ -0,0 +1,1465 @@ +--- | + ; ModuleID = './test/CodeGen/AMDGPU/sdwa-ops.mir' + source_filename = "./test/CodeGen/AMDGPU/sdwa-ops.mir" + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" + target triple = "amdgcn-unknown-unknown" + + define void @test1_add_co_sdwa() #0 { + entry: + unreachable + } + + define void @test2_add_co_sdwa() #0 { + entry: + unreachable + } + + define void @test3_add_co_sdwa() #0 { + entry: + unreachable + } + + define void @test4_add_co_sdwa() #0 { + entry: + unreachable + } + + define void @test5_add_co_sdwa() #0 { + entry: + unreachable + } + + define void @test6_add_co_sdwa() #0 { + entry: + unreachable + } + + define void @test7_add_co_sdwa() #0 { + entry: + unreachable + } + + define void @test8_add_co_sdwa() #0 { + entry: + unreachable + } + + define void @test9_add_co_sdwa() #0 { + entry: + unreachable + } + + define void @test10_add_co_sdwa() #0 { + entry: + unreachable + } + + define void @test11_add_co_sdwa() #0 { + entry: + unreachable + } + + define void @test12_add_co_sdwa() #0 { + entry: + unreachable + } + + attributes #0 = { "target-cpu"="gfx900" } + +... +--- +name: test1_add_co_sdwa +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: vgpr_32, preferred-register: '' } + - { id: 1, class: sgpr_64, preferred-register: '' } + - { id: 2, class: sreg_32_xm0, preferred-register: '' } + - { id: 3, class: vgpr_32, preferred-register: '' } + - { id: 4, class: vreg_64, preferred-register: '' } + - { id: 5, class: vgpr_32, preferred-register: '' } + - { id: 6, class: sreg_64_xexec, preferred-register: '' } + - { id: 7, class: vgpr_32, preferred-register: '' } + - { id: 8, class: sreg_64_xexec, preferred-register: '' } + - { id: 9, class: vreg_64, preferred-register: '' } + - { id: 10, class: vgpr_32, preferred-register: '' } + - { id: 11, class: vgpr_32, preferred-register: '' } + - { id: 12, class: sreg_64_xexec, preferred-register: '' } + - { id: 13, class: vgpr_32, preferred-register: '' } + - { id: 14, class: sreg_64_xexec, preferred-register: '' } + - { id: 15, class: vreg_64, preferred-register: '' } + - { id: 16, class: vgpr_32, preferred-register: '' } + - { id: 17, class: vgpr_32, preferred-register: '' } + - { id: 18, class: sreg_64_xexec, preferred-register: '' } + - { id: 19, class: vgpr_32, preferred-register: '' } + - { id: 20, class: sreg_64_xexec, preferred-register: '' } + - { id: 21, class: vreg_64, preferred-register: '' } +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } + - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + gdsSize: 0 + dynLDSAlign: 1 + isEntryFunction: false + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + bytesInStackArgArea: 0 + returnsVoid: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + queuePtr: { reg: '$sgpr6_sgpr7' } + dispatchID: { reg: '$sgpr10_sgpr11' } + workGroupIDX: { reg: '$sgpr12' } + workGroupIDY: { reg: '$sgpr13' } + workGroupIDZ: { reg: '$sgpr14' } + LDSKernelId: { reg: '$sgpr15' } + implicitArgPtr: { reg: '$sgpr8_sgpr9' } + workItemIDX: { reg: '$vgpr31', mask: 1023 } + workItemIDY: { reg: '$vgpr31', mask: 1047552 } + workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } + mode: + ieee: true + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 + vgprForAGPRCopy: '' +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %1:sgpr_64 = COPY $sgpr0_sgpr1 + %0:vgpr_32 = COPY $vgpr0 + %2:sreg_32_xm0 = S_MOV_B32 255 + %3:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %4:vreg_64 = COPY $sgpr0_sgpr1 + %5:vgpr_32 = nsw V_ADD_CO_U32_sdwa 0, %4.sub0, 0, %0, 0, 6, 0, 6, 0, implicit-def $vcc, implicit $exec + %7:vgpr_32, dead %8:sreg_64_xexec = nuw V_ADDC_U32_e64 %4.sub1, %0, killed $vcc, 0, implicit $exec + %9:vreg_64 = REG_SEQUENCE %5, %subreg.sub0, %7, %subreg.sub1 + GLOBAL_STORE_DWORDX2_SADDR %4.sub0, %9, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) + %10:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %11:vgpr_32 = V_ADD_CO_U32_sdwa 0, %4.sub0, 0, %0, 0, 6, 0, 6, 0, implicit-def $vcc, implicit $exec + %13:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 %4.sub1, %0, killed $vcc, 0, implicit $exec + %15:vreg_64 = REG_SEQUENCE %11, %subreg.sub0, %13, %subreg.sub1 + GLOBAL_STORE_DWORDX2_SADDR %4.sub0, %15, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) + %16:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %17:vgpr_32 = V_ADD_CO_U32_sdwa 0, %4.sub0, 0, %0, 0, 6, 0, 6, 0, implicit-def $vcc, implicit $exec + %19:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 %4.sub1, %0, killed $vcc, 0, implicit $exec + %21:vreg_64 = REG_SEQUENCE %17, %subreg.sub0, %19, %subreg.sub1 + GLOBAL_STORE_DWORDX2_SADDR %4.sub0, %21, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) + +... +--- +name: test2_add_co_sdwa +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: vgpr_32, preferred-register: '' } + - { id: 1, class: sgpr_64, preferred-register: '' } + - { id: 2, class: sreg_32_xm0, preferred-register: '' } + - { id: 3, class: vgpr_32, preferred-register: '' } + - { id: 4, class: vreg_64, preferred-register: '' } + - { id: 5, class: vgpr_32, preferred-register: '' } + - { id: 6, class: sreg_64_xexec, preferred-register: '' } + - { id: 7, class: vgpr_32, preferred-register: '' } + - { id: 8, class: vgpr_32, preferred-register: '' } + - { id: 9, class: sreg_64_xexec, preferred-register: '' } + - { id: 10, class: vgpr_32, preferred-register: '' } + - { id: 11, class: sreg_64_xexec, preferred-register: '' } + - { id: 12, class: vreg_64, preferred-register: '' } + - { id: 13, class: vgpr_32, preferred-register: '' } + - { id: 14, class: sreg_64_xexec, preferred-register: '' } + - { id: 15, class: vreg_64, preferred-register: '' } +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } + - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + gdsSize: 0 + dynLDSAlign: 1 + isEntryFunction: false + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + bytesInStackArgArea: 0 + returnsVoid: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + queuePtr: { reg: '$sgpr6_sgpr7' } + dispatchID: { reg: '$sgpr10_sgpr11' } + workGroupIDX: { reg: '$sgpr12' } + workGroupIDY: { reg: '$sgpr13' } + workGroupIDZ: { reg: '$sgpr14' } + LDSKernelId: { reg: '$sgpr15' } + implicitArgPtr: { reg: '$sgpr8_sgpr9' } + workItemIDX: { reg: '$vgpr31', mask: 1023 } + workItemIDY: { reg: '$vgpr31', mask: 1047552 } + workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } + mode: + ieee: true + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 + vgprForAGPRCopy: '' +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %1:sgpr_64 = COPY $sgpr0_sgpr1 + %0:vgpr_32 = COPY $vgpr0 + %2:sreg_32_xm0 = S_MOV_B32 255 + %3:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %4:vreg_64 = COPY $sgpr0_sgpr1 + %5:vgpr_32 = V_ADD_CO_U32_sdwa 0, %4.sub0, 0, %0, 0, 6, 0, 6, 0, implicit-def $vcc, implicit $exec + %7:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %8:vgpr_32, %9:sreg_64_xexec = V_ADD_CO_U32_e64 %4.sub0, %7, 0, implicit $exec + %10:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 %4.sub1, %0, killed %9, 0, implicit $exec + %12:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %10, %subreg.sub1 + %13:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 %4.sub1, %0, killed $vcc, 0, implicit $exec + %15:vreg_64 = REG_SEQUENCE %5, %subreg.sub0, %13, %subreg.sub1 + GLOBAL_STORE_DWORDX2_SADDR %4.sub0, %15, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) + %7:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %8:vgpr_32, %9:sreg_64_xexec = V_ADD_CO_U32_e64 %4.sub0, %7, 0, implicit $exec + %10:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 %4.sub1, %0, killed %9, 0, implicit $exec + %12:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %10, %subreg.sub1 + GLOBAL_STORE_DWORDX2_SADDR %4.sub0, %12, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) + +... +--- +name: test3_add_co_sdwa +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: vgpr_32, preferred-register: '' } + - { id: 1, class: sgpr_64, preferred-register: '' } + - { id: 2, class: sreg_32_xm0, preferred-register: '' } + - { id: 3, class: vgpr_32, preferred-register: '' } + - { id: 4, class: vreg_64, preferred-register: '' } + - { id: 5, class: vgpr_32, preferred-register: '' } + - { id: 6, class: sreg_64_xexec, preferred-register: '' } + - { id: 7, class: vgpr_32, preferred-register: '' } + - { id: 8, class: sreg_64_xexec, preferred-register: '' } + - { id: 9, class: vreg_64, preferred-register: '' } +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } + - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + gdsSize: 0 + dynLDSAlign: 1 + isEntryFunction: false + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + bytesInStackArgArea: 0 + returnsVoid: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + queuePtr: { reg: '$sgpr6_sgpr7' } + dispatchID: { reg: '$sgpr10_sgpr11' } + workGroupIDX: { reg: '$sgpr12' } + workGroupIDY: { reg: '$sgpr13' } + workGroupIDZ: { reg: '$sgpr14' } + LDSKernelId: { reg: '$sgpr15' } + implicitArgPtr: { reg: '$sgpr8_sgpr9' } + workItemIDX: { reg: '$vgpr31', mask: 1023 } + workItemIDY: { reg: '$vgpr31', mask: 1047552 } + workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } + mode: + ieee: true + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 + vgprForAGPRCopy: '' +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %1:sgpr_64 = COPY $sgpr0_sgpr1 + %0:vgpr_32 = COPY $vgpr0 + %2:sreg_32_xm0 = S_MOV_B32 255 + %3:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %4:vreg_64 = COPY $sgpr0_sgpr1 + %5:vgpr_32, %6:sreg_64_xexec = V_ADD_CO_U32_e64 %4.sub0, %3, 0, implicit $exec + %7:vgpr_32, %8:sreg_64_xexec = V_ADDC_U32_e64 %4.sub1, %0, killed %6, 0, implicit $exec + %9:vreg_64 = REG_SEQUENCE %5, %subreg.sub0, %8, %subreg.sub1 + GLOBAL_STORE_DWORDX2_SADDR %4.sub0, %9, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) + +... +--- +name: test4_add_co_sdwa +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: vgpr_32, preferred-register: '' } + - { id: 1, class: sgpr_64, preferred-register: '' } + - { id: 2, class: sreg_32_xm0, preferred-register: '' } + - { id: 3, class: vgpr_32, preferred-register: '' } + - { id: 4, class: vreg_64, preferred-register: '' } + - { id: 5, class: vgpr_32, preferred-register: '' } + - { id: 6, class: sreg_64_xexec, preferred-register: '' } + - { id: 7, class: vgpr_32, preferred-register: '' } + - { id: 8, class: sreg_64_xexec, preferred-register: '' } + - { id: 9, class: vreg_64, preferred-register: '' } +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } + - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + gdsSize: 0 + dynLDSAlign: 1 + isEntryFunction: false + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + bytesInStackArgArea: 0 + returnsVoid: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + queuePtr: { reg: '$sgpr6_sgpr7' } + dispatchID: { reg: '$sgpr10_sgpr11' } + workGroupIDX: { reg: '$sgpr12' } + workGroupIDY: { reg: '$sgpr13' } + workGroupIDZ: { reg: '$sgpr14' } + LDSKernelId: { reg: '$sgpr15' } + implicitArgPtr: { reg: '$sgpr8_sgpr9' } + workItemIDX: { reg: '$vgpr31', mask: 1023 } + workItemIDY: { reg: '$vgpr31', mask: 1047552 } + workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } + mode: + ieee: true + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 + vgprForAGPRCopy: '' +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %1:sgpr_64 = COPY $sgpr0_sgpr1 + %0:vgpr_32 = COPY $vgpr0 + %2:sreg_32_xm0 = S_MOV_B32 255 + %3:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %4:vreg_64 = COPY $sgpr0_sgpr1 + %5:vgpr_32, %6:sreg_64_xexec = V_ADD_CO_U32_e64 %4.sub0, %3, 0, implicit $exec + %7:vgpr_32, %8:sreg_64_xexec = V_ADDC_U32_e64 %4.sub1, %0, %6, 0, implicit $exec + %9:vreg_64 = REG_SEQUENCE %5, %subreg.sub0, %6, %subreg.sub1 + GLOBAL_STORE_DWORDX2_SADDR %4.sub0, %9, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) + +... +--- +name: test5_add_co_sdwa +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: vgpr_32, preferred-register: '' } + - { id: 1, class: sgpr_64, preferred-register: '' } + - { id: 2, class: sreg_32_xm0, preferred-register: '' } + - { id: 3, class: vgpr_32, preferred-register: '' } + - { id: 4, class: vreg_64, preferred-register: '' } + - { id: 5, class: vgpr_32, preferred-register: '' } + - { id: 6, class: sreg_64_xexec, preferred-register: '' } + - { id: 7, class: vgpr_32, preferred-register: '' } + - { id: 8, class: sreg_64_xexec, preferred-register: '' } + - { id: 9, class: vreg_64, preferred-register: '' } +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } + - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + gdsSize: 0 + dynLDSAlign: 1 + isEntryFunction: false + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + bytesInStackArgArea: 0 + returnsVoid: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + queuePtr: { reg: '$sgpr6_sgpr7' } + dispatchID: { reg: '$sgpr10_sgpr11' } + workGroupIDX: { reg: '$sgpr12' } + workGroupIDY: { reg: '$sgpr13' } + workGroupIDZ: { reg: '$sgpr14' } + LDSKernelId: { reg: '$sgpr15' } + implicitArgPtr: { reg: '$sgpr8_sgpr9' } + workItemIDX: { reg: '$vgpr31', mask: 1023 } + workItemIDY: { reg: '$vgpr31', mask: 1047552 } + workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } + mode: + ieee: true + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 + vgprForAGPRCopy: '' +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %1:sgpr_64 = COPY $sgpr0_sgpr1 + %0:vgpr_32 = COPY $vgpr0 + %2:sreg_32_xm0 = S_MOV_B32 255 + %3:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %4:vreg_64 = COPY $sgpr0_sgpr1 + %5:vgpr_32 = V_ADD_CO_U32_sdwa 0, %4.sub0, 0, %0, 0, 6, 0, 6, 0, implicit-def $vcc, implicit $exec + %7:vgpr_32, %8:sreg_64_xexec = V_ADDC_U32_e64 %4.sub1, %0, $vcc, 0, implicit $exec + %9:vreg_64 = REG_SEQUENCE %5, %subreg.sub0, %7, %subreg.sub1 + GLOBAL_STORE_DWORDX2_SADDR %4.sub0, %9, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) + +... +--- +name: test6_add_co_sdwa +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: vgpr_32, preferred-register: '' } + - { id: 1, class: sgpr_64, preferred-register: '' } + - { id: 2, class: sreg_32_xm0, preferred-register: '' } + - { id: 3, class: vgpr_32, preferred-register: '' } + - { id: 4, class: vreg_64, preferred-register: '' } + - { id: 5, class: vgpr_32, preferred-register: '' } + - { id: 6, class: sreg_64_xexec, preferred-register: '' } + - { id: 7, class: vreg_64, preferred-register: '' } +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } + - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + gdsSize: 0 + dynLDSAlign: 1 + isEntryFunction: false + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + bytesInStackArgArea: 0 + returnsVoid: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + queuePtr: { reg: '$sgpr6_sgpr7' } + dispatchID: { reg: '$sgpr10_sgpr11' } + workGroupIDX: { reg: '$sgpr12' } + workGroupIDY: { reg: '$sgpr13' } + workGroupIDZ: { reg: '$sgpr14' } + LDSKernelId: { reg: '$sgpr15' } + implicitArgPtr: { reg: '$sgpr8_sgpr9' } + workItemIDX: { reg: '$vgpr31', mask: 1023 } + workItemIDY: { reg: '$vgpr31', mask: 1047552 } + workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } + mode: + ieee: true + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 + vgprForAGPRCopy: '' +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %1:sgpr_64 = COPY $sgpr0_sgpr1 + %0:vgpr_32 = COPY $vgpr0 + %2:sreg_32_xm0 = S_MOV_B32 255 + %3:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %4:vreg_64 = COPY $sgpr0_sgpr1 + %5:vgpr_32, %6:sreg_64_xexec = V_ADD_CO_U32_e64 %4.sub0, %3, 0, implicit $exec + %7:vreg_64 = REG_SEQUENCE %5, %subreg.sub0, %3, %subreg.sub1 + GLOBAL_STORE_DWORDX2_SADDR %4.sub0, %7, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) + +... +--- +name: test7_add_co_sdwa +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: vgpr_32, preferred-register: '' } + - { id: 1, class: sgpr_64, preferred-register: '' } + - { id: 2, class: sreg_32_xm0, preferred-register: '' } + - { id: 3, class: vgpr_32, preferred-register: '' } + - { id: 4, class: sreg_64_xexec, preferred-register: '' } + - { id: 5, class: vreg_64, preferred-register: '' } + - { id: 6, class: vgpr_32, preferred-register: '' } + - { id: 7, class: sreg_64_xexec, preferred-register: '' } + - { id: 8, class: vreg_64, preferred-register: '' } +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } + - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + gdsSize: 0 + dynLDSAlign: 1 + isEntryFunction: false + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + bytesInStackArgArea: 0 + returnsVoid: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + queuePtr: { reg: '$sgpr6_sgpr7' } + dispatchID: { reg: '$sgpr10_sgpr11' } + workGroupIDX: { reg: '$sgpr12' } + workGroupIDY: { reg: '$sgpr13' } + workGroupIDZ: { reg: '$sgpr14' } + LDSKernelId: { reg: '$sgpr15' } + implicitArgPtr: { reg: '$sgpr8_sgpr9' } + workItemIDX: { reg: '$vgpr31', mask: 1023 } + workItemIDY: { reg: '$vgpr31', mask: 1047552 } + workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } + mode: + ieee: true + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 + vgprForAGPRCopy: '' +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %1:sgpr_64 = COPY $sgpr0_sgpr1 + %0:vgpr_32 = COPY $vgpr0 + %2:sreg_32_xm0 = S_MOV_B32 255 + %3:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %4:sreg_64_xexec = COPY $sgpr0_sgpr1 + %5:vreg_64 = COPY $sgpr0_sgpr1 + %6:vgpr_32, %7:sreg_64_xexec = V_ADDC_U32_e64 %5.sub1, %0, %4, 0, implicit $exec + %8:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %3, %subreg.sub1 + GLOBAL_STORE_DWORDX2_SADDR %5.sub0, %8, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) + +... +--- +name: test8_add_co_sdwa +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: vgpr_32, preferred-register: '' } + - { id: 1, class: sgpr_64, preferred-register: '' } + - { id: 2, class: sreg_32_xm0, preferred-register: '' } + - { id: 3, class: vgpr_32, preferred-register: '' } + - { id: 4, class: vreg_64, preferred-register: '' } + - { id: 5, class: vgpr_32, preferred-register: '' } + - { id: 6, class: sreg_64_xexec, preferred-register: '' } + - { id: 7, class: vgpr_32, preferred-register: '' } + - { id: 8, class: sreg_64_xexec, preferred-register: '' } + - { id: 9, class: vreg_64, preferred-register: '' } + - { id: 10, class: vreg_64, preferred-register: '' } +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } + - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + gdsSize: 0 + dynLDSAlign: 1 + isEntryFunction: false + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + bytesInStackArgArea: 0 + returnsVoid: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + queuePtr: { reg: '$sgpr6_sgpr7' } + dispatchID: { reg: '$sgpr10_sgpr11' } + workGroupIDX: { reg: '$sgpr12' } + workGroupIDY: { reg: '$sgpr13' } + workGroupIDZ: { reg: '$sgpr14' } + LDSKernelId: { reg: '$sgpr15' } + implicitArgPtr: { reg: '$sgpr8_sgpr9' } + workItemIDX: { reg: '$vgpr31', mask: 1023 } + workItemIDY: { reg: '$vgpr31', mask: 1047552 } + workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } + mode: + ieee: true + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 + vgprForAGPRCopy: '' +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %1:sgpr_64 = COPY $sgpr0_sgpr1 + %0:vgpr_32 = COPY $vgpr0 + %2:sreg_32_xm0 = S_MOV_B32 255 + %3:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %4:vreg_64 = COPY $sgpr0_sgpr1 + %5:vgpr_32, %6:sreg_64_xexec = V_ADD_CO_U32_e64 %4.sub0, %3, 0, implicit $exec + $vcc = COPY %4 + %7:vgpr_32, %8:sreg_64_xexec = V_ADDC_U32_e64 %4.sub1, %0, %6, 0, implicit $exec + %9:vreg_64 = COPY $vcc + %10:vreg_64 = REG_SEQUENCE %5, %subreg.sub0, %7, %subreg.sub1 + GLOBAL_STORE_DWORDX2_SADDR %9.sub0, %10, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) + +... +--- +name: test9_add_co_sdwa +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: vgpr_32, preferred-register: '' } + - { id: 1, class: sgpr_64, preferred-register: '' } + - { id: 2, class: sreg_32_xm0, preferred-register: '' } + - { id: 3, class: vreg_64, preferred-register: '' } + - { id: 4, class: vgpr_32, preferred-register: '' } + - { id: 5, class: vgpr_32, preferred-register: '' } + - { id: 6, class: sreg_64_xexec, preferred-register: '' } + - { id: 7, class: vgpr_32, preferred-register: '' } + - { id: 8, class: sreg_64_xexec, preferred-register: '' } + - { id: 9, class: vreg_64, preferred-register: '' } + - { id: 10, class: vreg_64, preferred-register: '' } +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } + - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + gdsSize: 0 + dynLDSAlign: 1 + isEntryFunction: false + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + bytesInStackArgArea: 0 + returnsVoid: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + queuePtr: { reg: '$sgpr6_sgpr7' } + dispatchID: { reg: '$sgpr10_sgpr11' } + workGroupIDX: { reg: '$sgpr12' } + workGroupIDY: { reg: '$sgpr13' } + workGroupIDZ: { reg: '$sgpr14' } + LDSKernelId: { reg: '$sgpr15' } + implicitArgPtr: { reg: '$sgpr8_sgpr9' } + workItemIDX: { reg: '$vgpr31', mask: 1023 } + workItemIDY: { reg: '$vgpr31', mask: 1047552 } + workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } + mode: + ieee: true + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 + vgprForAGPRCopy: '' +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %1:sgpr_64 = COPY $sgpr0_sgpr1 + %0:vgpr_32 = COPY $vgpr0 + %2:sreg_32_xm0 = S_MOV_B32 255 + %3:vreg_64 = COPY $sgpr0_sgpr1 + $vcc = COPY %3 + %4:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %5:vgpr_32, %6:sreg_64_xexec = V_ADD_CO_U32_e64 %3.sub0, %4, 0, implicit $exec + %7:vgpr_32, %8:sreg_64_xexec = V_ADDC_U32_e64 %3.sub1, %0, %6, 0, implicit $exec + %9:vreg_64 = COPY $vcc + %10:vreg_64 = REG_SEQUENCE %5, %subreg.sub0, %7, %subreg.sub1 + GLOBAL_STORE_DWORDX2_SADDR %9.sub0, %10, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) + +... +--- +name: test10_add_co_sdwa +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: vgpr_32, preferred-register: '' } + - { id: 1, class: sgpr_64, preferred-register: '' } + - { id: 2, class: sreg_32_xm0, preferred-register: '' } + - { id: 3, class: vreg_64, preferred-register: '' } + - { id: 4, class: vgpr_32, preferred-register: '' } + - { id: 5, class: vgpr_32, preferred-register: '' } + - { id: 6, class: sreg_64_xexec, preferred-register: '' } + - { id: 7, class: vgpr_32, preferred-register: '' } + - { id: 8, class: vreg_64, preferred-register: '' } + - { id: 9, class: vgpr_32, preferred-register: '' } + - { id: 10, class: sreg_64_xexec, preferred-register: '' } + - { id: 11, class: vreg_64, preferred-register: '' } +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } + - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + gdsSize: 0 + dynLDSAlign: 1 + isEntryFunction: false + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + bytesInStackArgArea: 0 + returnsVoid: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + queuePtr: { reg: '$sgpr6_sgpr7' } + dispatchID: { reg: '$sgpr10_sgpr11' } + workGroupIDX: { reg: '$sgpr12' } + workGroupIDY: { reg: '$sgpr13' } + workGroupIDZ: { reg: '$sgpr14' } + LDSKernelId: { reg: '$sgpr15' } + implicitArgPtr: { reg: '$sgpr8_sgpr9' } + workItemIDX: { reg: '$vgpr31', mask: 1023 } + workItemIDY: { reg: '$vgpr31', mask: 1047552 } + workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } + mode: + ieee: true + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 + vgprForAGPRCopy: '' +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %1:sgpr_64 = COPY $sgpr0_sgpr1 + %0:vgpr_32 = COPY $vgpr0 + %2:sreg_32_xm0 = S_MOV_B32 255 + %3:vreg_64 = COPY $sgpr0_sgpr1 + $vcc_lo = COPY %3.sub0 + %4:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %5:vgpr_32, %6:sreg_64_xexec = V_ADD_CO_U32_e64 %3.sub0, %4, 0, implicit $exec + %7:vgpr_32 = COPY $vcc_lo + %8:vreg_64 = REG_SEQUENCE %7, %subreg.sub0, %4, %subreg.sub1 + %9:vgpr_32, %10:sreg_64_xexec = V_ADDC_U32_e64 %3.sub1, %0, %6, 0, implicit $exec + %11:vreg_64 = REG_SEQUENCE %5, %subreg.sub0, %9, %subreg.sub1 + GLOBAL_STORE_DWORDX2_SADDR %8.sub0, %11, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) + +... +--- +name: test11_add_co_sdwa +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: vgpr_32, preferred-register: '' } + - { id: 1, class: sgpr_64, preferred-register: '' } + - { id: 2, class: sreg_32_xm0, preferred-register: '' } + - { id: 3, class: vreg_64, preferred-register: '' } + - { id: 4, class: vgpr_32, preferred-register: '' } + - { id: 5, class: vgpr_32, preferred-register: '' } + - { id: 6, class: sreg_64_xexec, preferred-register: '' } + - { id: 7, class: vgpr_32, preferred-register: '' } + - { id: 8, class: vreg_64, preferred-register: '' } + - { id: 9, class: vgpr_32, preferred-register: '' } + - { id: 10, class: sreg_64_xexec, preferred-register: '' } + - { id: 11, class: vreg_64, preferred-register: '' } +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } + - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + gdsSize: 0 + dynLDSAlign: 1 + isEntryFunction: false + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + bytesInStackArgArea: 0 + returnsVoid: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + queuePtr: { reg: '$sgpr6_sgpr7' } + dispatchID: { reg: '$sgpr10_sgpr11' } + workGroupIDX: { reg: '$sgpr12' } + workGroupIDY: { reg: '$sgpr13' } + workGroupIDZ: { reg: '$sgpr14' } + LDSKernelId: { reg: '$sgpr15' } + implicitArgPtr: { reg: '$sgpr8_sgpr9' } + workItemIDX: { reg: '$vgpr31', mask: 1023 } + workItemIDY: { reg: '$vgpr31', mask: 1047552 } + workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } + mode: + ieee: true + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 + vgprForAGPRCopy: '' +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %1:sgpr_64 = COPY $sgpr0_sgpr1 + %0:vgpr_32 = COPY $vgpr0 + %2:sreg_32_xm0 = S_MOV_B32 255 + %3:vreg_64 = COPY $sgpr0_sgpr1 + $vcc_hi = COPY %3.sub0 + %4:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %5:vgpr_32, %6:sreg_64_xexec = V_ADD_CO_U32_e64 %3.sub0, %4, 0, implicit $exec + %7:vgpr_32 = COPY $vcc_hi + %8:vreg_64 = REG_SEQUENCE %7, %subreg.sub0, %4, %subreg.sub1 + %9:vgpr_32, %10:sreg_64_xexec = V_ADDC_U32_e64 %3.sub1, %0, %6, 0, implicit $exec + %11:vreg_64 = REG_SEQUENCE %5, %subreg.sub0, %9, %subreg.sub1 + GLOBAL_STORE_DWORDX2_SADDR %8.sub0, %11, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) + +... +--- +name: test12_add_co_sdwa +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: vgpr_32, preferred-register: '' } + - { id: 1, class: sgpr_64, preferred-register: '' } + - { id: 2, class: sreg_32_xm0, preferred-register: '' } + - { id: 3, class: vreg_64, preferred-register: '' } + - { id: 4, class: vgpr_32, preferred-register: '' } + - { id: 5, class: vgpr_32, preferred-register: '' } + - { id: 6, class: sreg_64_xexec, preferred-register: '' } + - { id: 7, class: vreg_64, preferred-register: '' } + - { id: 8, class: vgpr_32, preferred-register: '' } + - { id: 9, class: sreg_64_xexec, preferred-register: '' } + - { id: 10, class: vreg_64, preferred-register: '' } +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } + - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + gdsSize: 0 + dynLDSAlign: 1 + isEntryFunction: false + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + bytesInStackArgArea: 0 + returnsVoid: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + queuePtr: { reg: '$sgpr6_sgpr7' } + dispatchID: { reg: '$sgpr10_sgpr11' } + workGroupIDX: { reg: '$sgpr12' } + workGroupIDY: { reg: '$sgpr13' } + workGroupIDZ: { reg: '$sgpr14' } + LDSKernelId: { reg: '$sgpr15' } + implicitArgPtr: { reg: '$sgpr8_sgpr9' } + workItemIDX: { reg: '$vgpr31', mask: 1023 } + workItemIDY: { reg: '$vgpr31', mask: 1047552 } + workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } + mode: + ieee: true + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 + vgprForAGPRCopy: '' +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1 + + %1:sgpr_64 = COPY $sgpr0_sgpr1 + %0:vgpr_32 = COPY $vgpr0 + %2:sreg_32_xm0 = S_MOV_B32 255 + %3:vreg_64 = COPY $sgpr0_sgpr1 + %4:vgpr_32 = V_AND_B32_e32 %2, %0, implicit $exec + %5:vgpr_32, %6:sreg_64_xexec = V_ADD_CO_U32_e64 %3.sub0, %4, 0, implicit $exec + $vcc = COPY %3 + %7:vreg_64 = COPY killed $vcc + %8:vgpr_32, %9:sreg_64_xexec = V_ADDC_U32_e64 %3.sub1, %0, %6, 0, implicit $exec + %10:vreg_64 = REG_SEQUENCE %5, %subreg.sub0, %8, %subreg.sub1 + GLOBAL_STORE_DWORDX2_SADDR %7.sub0, %10, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) + +...