Index: llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp =================================================================== --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -1100,6 +1100,10 @@ return true; } + // Do not attempt to relax schedule even more if we are already spilling. + if (isRegionWithExcessRP()) + return false; + LLVM_DEBUG( dbgs() << "\n\t *** In shouldRevertScheduling ***\n" Index: llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir @@ -0,0 +1,731 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1100 -start-before=machine-scheduler -stop-after=greedy,1 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +--- | + define amdgpu_kernel void @no_sched_metric_due_to_spills() #0 { + ret void + } + + attributes #0 = { "amdgpu-flat-work-group-size"="1,256" } +... + +# GCN-LABEL: name: no_sched_metric_due_to_spills +# GCN-NOT: SI_SPILL_ +# GCN: S_ENDPGM +--- +name: no_sched_metric_due_to_spills +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: '$sgpr32' + occupancy: 4 +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1, $sgpr15 + + %15:sgpr_32 = COPY $sgpr15 + %1:sgpr_64(p4) = COPY $sgpr0_sgpr1 + %0:vgpr_32(s32) = COPY $vgpr0 + %2036:sgpr_128 = S_LOAD_DWORDX4_IMM %1(p4), 0, 0 :: (dereferenceable invariant load (s128), addrspace 4) + undef %887.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1(p4), 16, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) + %19:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1(p4), 32, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4) + %20:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1(p4), 64, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4) + %21:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1(p4), 84, 0 :: (dereferenceable invariant load (s32), addrspace 4) + %22:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1(p4), 112, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4) + %23:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1(p4), 128, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4) + %24:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1(p4), 176, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4) + %25:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1(p4), 192, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4) + %26:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1(p4), 216, 0 :: (dereferenceable invariant load (s64), addrspace 4) + %30:sreg_32 = S_ADD_I32 %26.sub0, 127, implicit-def dead $scc + %32:sreg_32 = S_ASHR_I32 %30, 31, implicit-def dead $scc + %34:sreg_32 = S_LSHR_B32 %32, 25, implicit-def dead $scc + %35:sreg_32 = S_ADD_I32 %30, %34, implicit-def dead $scc + %37:sreg_32 = S_ASHR_I32 %35, 7, implicit-def dead $scc + %39:sreg_32 = S_ADD_I32 %26.sub1, 255, implicit-def dead $scc + %40:sreg_32 = S_ASHR_I32 %39, 31, implicit-def dead $scc + %42:sreg_32 = S_LSHR_B32 %40, 24, implicit-def dead $scc + %43:sreg_32 = S_ADD_I32 %39, %42, implicit-def dead $scc + %45:sreg_32 = S_ASHR_I32 %43, 8, implicit-def dead $scc + %46:sreg_32 = nsw S_MUL_I32 %45, %37 + %47:sreg_32 = S_ASHR_I32 %15, 31, implicit-def dead $scc + %48:sreg_32 = S_ASHR_I32 %46, 31, implicit-def dead $scc + %49:sreg_32 = S_ADD_I32 %15, %47, implicit-def dead $scc + %50:sreg_32 = S_ADD_I32 %46, %48, implicit-def dead $scc + %51:sreg_32 = S_XOR_B32 %49, %47, implicit-def dead $scc + %52:sreg_32 = S_XOR_B32 %50, %48, implicit-def dead $scc + %53:vgpr_32 = V_CVT_F32_U32_e64 %52, 0, 0, implicit $mode, implicit $exec + %54:vgpr_32 = V_RCP_IFLAG_F32_e64 0, %53, 0, 0, implicit $mode, implicit $exec + %56:vgpr_32 = V_MUL_F32_e64 0, 1333788670, 0, %54, 0, 0, implicit $mode, implicit $exec + %57:vgpr_32 = V_CVT_U32_F32_e64 0, %56, 0, 0, implicit $mode, implicit $exec + undef %330.sub0:sgpr_256 = S_MOV_B32 0 + %59:sreg_32 = S_SUB_I32 0, %52, implicit-def dead $scc + %61:sreg_32 = V_READFIRSTLANE_B32 %57, implicit $exec + %60:sreg_32 = S_MUL_I32 %59, %61 + %62:sreg_32 = S_MUL_HI_U32 %61, %60 + %64:sreg_32 = S_ADD_I32 %61, %62, implicit-def dead $scc + %66:sreg_32 = S_MUL_HI_U32 %51, %64 + %67:sreg_32 = S_MUL_I32 %66, %52 + %68:sreg_32 = S_SUB_I32 %51, %67, implicit-def dead $scc + %69:sreg_32 = S_SUB_I32 %68, %52, implicit-def dead $scc + S_CMP_GE_U32 %68, %52, implicit-def $scc + %70:sreg_32 = S_CSELECT_B32 %69, %68, implicit killed $scc + %71:sreg_32 = S_SUB_I32 %70, %52, implicit-def dead $scc + S_CMP_GE_U32 %70, %52, implicit-def $scc + %72:sreg_32 = S_CSELECT_B32 %71, %70, implicit killed $scc + %73:sreg_32 = S_XOR_B32 %72, %47, implicit-def dead $scc + %74:sreg_32 = S_SUB_I32 %73, %47, implicit-def dead $scc + %77:sreg_32 = S_ASHR_I32 %74, 31, implicit-def dead $scc + %78:sreg_32 = S_ASHR_I32 %45, 31, implicit-def dead $scc + %79:sreg_32 = S_XOR_B32 %77, %78, implicit-def dead $scc + %80:sreg_32 = S_ADD_I32 %74, %77, implicit-def dead $scc + %81:sreg_32 = S_ADD_I32 %45, %78, implicit-def dead $scc + %82:sreg_32 = S_XOR_B32 %80, %77, implicit-def dead $scc + %83:sreg_32 = S_XOR_B32 %81, %78, implicit-def dead $scc + %84:vgpr_32 = V_CVT_F32_U32_e64 %83, 0, 0, implicit $mode, implicit $exec + %85:vgpr_32 = V_RCP_IFLAG_F32_e64 0, %84, 0, 0, implicit $mode, implicit $exec + %86:vgpr_32 = V_MUL_F32_e64 0, 1333788670, 0, %85, 0, 0, implicit $mode, implicit $exec + %87:vgpr_32 = V_CVT_U32_F32_e64 0, %86, 0, 0, implicit $mode, implicit $exec + %88:sreg_32 = S_SUB_I32 0, %83, implicit-def dead $scc + %90:sreg_32 = V_READFIRSTLANE_B32 %87, implicit $exec + %89:sreg_32 = S_MUL_I32 %88, %90 + %91:sreg_32 = S_MUL_HI_U32 %90, %89 + %93:sreg_32 = S_ADD_I32 %90, %91, implicit-def dead $scc + %95:sreg_32 = S_MUL_HI_U32 %82, %93 + %96:sreg_32 = S_MUL_I32 %95, %83 + %97:sreg_32 = S_SUB_I32 %82, %96, implicit-def dead $scc + %99:sreg_32 = S_ADD_I32 %95, 1, implicit-def dead $scc + %100:sreg_32 = S_SUB_I32 %97, %83, implicit-def dead $scc + S_CMP_GE_U32 %97, %83, implicit-def $scc + %101:sreg_32 = S_CSELECT_B32 %99, %95, implicit $scc + %102:sreg_32 = S_CSELECT_B32 %100, %97, implicit killed $scc + %103:sreg_32 = S_ADD_I32 %101, 1, implicit-def dead $scc + S_CMP_GE_U32 %102, %83, implicit-def $scc + %104:sreg_32 = S_CSELECT_B32 %103, %101, implicit killed $scc + %105:sreg_32 = S_XOR_B32 %104, %79, implicit-def dead $scc + %106:sreg_32 = S_SUB_I32 %105, %79, implicit-def dead $scc + %107:sreg_32 = S_ASHR_I32 %35, 31, implicit-def dead $scc + %108:sreg_32 = S_ASHR_I32 %25, 31, implicit-def dead $scc + %109:sreg_32 = S_ADD_I32 %37, %107, implicit-def dead $scc + %110:sreg_32 = S_ADD_I32 %25, %108, implicit-def dead $scc + %111:sreg_32 = S_XOR_B32 %109, %107, implicit-def dead $scc + %112:sreg_32 = S_XOR_B32 %110, %108, implicit-def dead $scc + %113:vgpr_32 = V_CVT_F32_U32_e64 %112, 0, 0, implicit $mode, implicit $exec + %114:vgpr_32 = V_RCP_IFLAG_F32_e64 0, %113, 0, 0, implicit $mode, implicit $exec + %115:vgpr_32 = V_MUL_F32_e64 0, 1333788670, 0, %114, 0, 0, implicit $mode, implicit $exec + %116:vgpr_32 = V_CVT_U32_F32_e64 0, %115, 0, 0, implicit $mode, implicit $exec + %117:sreg_32 = S_SUB_I32 0, %112, implicit-def dead $scc + %119:sreg_32 = V_READFIRSTLANE_B32 %116, implicit $exec + %118:sreg_32 = S_MUL_I32 %117, %119 + %120:sreg_32 = S_MUL_HI_U32 %119, %118 + %122:sreg_32 = S_ADD_I32 %119, %120, implicit-def dead $scc + %124:sreg_32 = S_MUL_HI_U32 %111, %122 + %125:sreg_32 = S_MUL_I32 %124, %112 + %126:sreg_32 = S_SUB_I32 %111, %125, implicit-def dead $scc + %127:sreg_32 = S_SUB_I32 %126, %112, implicit-def dead $scc + S_CMP_GE_U32 %126, %112, implicit-def $scc + %128:sreg_32 = S_CSELECT_B32 %127, %126, implicit killed $scc + %129:sreg_32 = S_SUB_I32 %128, %112, implicit-def dead $scc + S_CMP_GE_U32 %128, %112, implicit-def $scc + %130:sreg_32 = S_CSELECT_B32 %129, %128, implicit killed $scc + %131:sreg_32 = S_XOR_B32 %130, %107, implicit-def dead $scc + %132:sreg_32 = S_SUB_I32 %131, %107, implicit-def dead $scc + %133:sreg_32 = nsw S_SUB_I32 %37, %132, implicit-def dead $scc + S_CMP_LT_I32 %106, %133, implicit-def $scc + %134:sreg_32 = S_CSELECT_B32 %25, %132, implicit killed $scc + %135:sreg_32 = S_MUL_I32 %106, %45 + %136:sreg_32 = S_SUB_I32 %74, %135, implicit-def dead $scc + %137:sreg_32 = S_ASHR_I32 %106, 31, implicit-def dead $scc + %138:sreg_32 = S_ADD_I32 %106, %137, implicit-def dead $scc + %139:sreg_32 = S_XOR_B32 %138, %137, implicit-def dead $scc + %140:sreg_32 = S_MUL_HI_U32 %139, %122 + %141:sreg_32 = S_MUL_I32 %140, %112 + %142:sreg_32 = S_SUB_I32 %139, %141, implicit-def dead $scc + %143:sreg_32 = S_SUB_I32 %142, %112, implicit-def dead $scc + S_CMP_GE_U32 %142, %112, implicit-def $scc + %144:sreg_32 = S_CSELECT_B32 %143, %142, implicit killed $scc + %145:sreg_32 = S_SUB_I32 %144, %112, implicit-def dead $scc + S_CMP_GE_U32 %144, %112, implicit-def $scc + %146:sreg_32 = S_CSELECT_B32 %145, %144, implicit killed $scc + %147:sreg_32 = S_XOR_B32 %146, %137, implicit-def dead $scc + %148:sreg_32 = S_SUB_I32 %147, %137, implicit-def dead $scc + %149:sreg_32 = nsw S_MUL_I32 %148, %45 + %150:sreg_32 = nsw S_ADD_I32 %149, %136, implicit-def dead $scc + %153:sreg_32 = S_ASHR_I32 %150, 31, implicit-def dead $scc + %154:sreg_32 = S_ASHR_I32 %134, 31, implicit-def dead $scc + %155:sreg_32 = S_XOR_B32 %153, %154, implicit-def dead $scc + %156:sreg_32 = S_ADD_I32 %150, %153, implicit-def dead $scc + %157:sreg_32 = S_ADD_I32 %134, %154, implicit-def dead $scc + %158:sreg_32 = S_XOR_B32 %156, %153, implicit-def dead $scc + %159:sreg_32 = S_XOR_B32 %157, %154, implicit-def dead $scc + %160:vgpr_32 = V_CVT_F32_U32_e64 %159, 0, 0, implicit $mode, implicit $exec + %161:vgpr_32 = V_RCP_IFLAG_F32_e64 0, %160, 0, 0, implicit $mode, implicit $exec + %162:vgpr_32 = V_MUL_F32_e64 0, 1333788670, 0, %161, 0, 0, implicit $mode, implicit $exec + %163:vgpr_32 = V_CVT_U32_F32_e64 0, %162, 0, 0, implicit $mode, implicit $exec + %164:sreg_32 = S_SUB_I32 0, %159, implicit-def dead $scc + %166:sreg_32 = V_READFIRSTLANE_B32 %163, implicit $exec + %165:sreg_32 = S_MUL_I32 %164, %166 + %167:sreg_32 = S_MUL_HI_U32 %166, %165 + %169:sreg_32 = S_ADD_I32 %166, %167, implicit-def dead $scc + %171:sreg_32 = S_MUL_HI_U32 %158, %169 + %172:sreg_32 = S_MUL_I32 %171, %159 + %173:sreg_32 = S_SUB_I32 %158, %172, implicit-def dead $scc + %174:sreg_32 = S_ADD_I32 %171, 1, implicit-def dead $scc + %175:sreg_32 = S_SUB_I32 %173, %159, implicit-def dead $scc + S_CMP_GE_U32 %173, %159, implicit-def $scc + %176:sreg_32 = S_CSELECT_B32 %174, %171, implicit $scc + %177:sreg_32 = S_CSELECT_B32 %175, %173, implicit killed $scc + %178:sreg_32 = S_ADD_I32 %176, 1, implicit-def dead $scc + S_CMP_GE_U32 %177, %159, implicit-def $scc + %179:sreg_32 = S_CSELECT_B32 %178, %176, implicit killed $scc + %180:sreg_32 = S_XOR_B32 %179, %155, implicit-def dead $scc + %181:sreg_32 = S_SUB_I32 %180, %155, implicit-def dead $scc + %182:sreg_32 = S_MUL_I32 %181, %134 + %183:sreg_32 = S_SUB_I32 %150, %182, implicit-def dead $scc + %184:sreg_32 = nsw S_SUB_I32 %106, %148, implicit-def dead $scc + %185:sreg_32 = S_ADD_I32 %184, %183, implicit-def dead $scc + %186:sreg_32 = S_LSHL_B32 %185, 7, implicit-def dead $scc + %189:sreg_32 = nsw S_LSHL_B32 %181, 8, implicit-def dead $scc + %192:vgpr_32 = nuw nsw V_LSHLREV_B32_e64 1, %0(s32), implicit $exec + %194:vgpr_32 = V_AND_B32_e64 6, %192, implicit $exec + %195:vgpr_32 = V_LSHRREV_B32_e64 1, %0(s32), implicit $exec + %197:vgpr_32 = V_AND_B32_e64 126, %195, implicit $exec + %198:vgpr_32 = nsw V_ADD_U32_e64 %186, %197, 0, implicit $exec + undef %1852.sub0:vreg_64 = nuw nsw V_LSHLREV_B32_e64 3, %194, implicit $exec + early-clobber %204:vreg_64, $sgpr_null = V_MAD_U64_U32_gfx11_e64 %198, %19, %1852, 0, implicit $exec + %208:vgpr_32 = V_MUL_U32_U24_e64 1032, %194, 0, implicit $exec + %209:vgpr_32 = nuw nsw V_LSHLREV_B32_e64 3, %197, implicit $exec + %211:vgpr_32 = V_AND_B32_e64 252, %0(s32), implicit $exec + %212:vgpr_32 = nsw V_ADD_U32_e64 %189, %211, 0, implicit $exec + early-clobber %213:vreg_64, $sgpr_null = V_MAD_U64_U32_gfx11_e64 %212, %21, %1852, 0, implicit $exec + %217:vgpr_32 = V_MUL_U32_U24_e64 2056, %194, 0, implicit $exec + %218:vgpr_32 = nuw nsw V_LSHLREV_B32_e64 3, %211, implicit $exec + %219:vgpr_32 = nuw nsw V_LSHLREV_B32_e64 3, %0(s32), implicit $exec + %220:vgpr_32 = V_BFE_U32_e64 %0(s32), 1, 3, implicit $exec + %221:vgpr_32 = V_AND_OR_B32_e64 %219, 8, %220, implicit $exec + %223:vgpr_32 = V_AND_B32_e64 128, %0(s32), implicit $exec + %225:vgpr_32 = V_AND_B32_e64 15, %0(s32), implicit $exec + %227:vgpr_32 = V_AND_OR_B32_e64 %195, 48, %225, implicit $exec + undef %245.sub2:sgpr_128 = S_LSHL_B32 %20, 1, implicit-def dead $scc + %245.sub3:sgpr_128 = S_MOV_B32 268566528 + %245.sub0:sgpr_128 = COPY %2036.sub0 + %245.sub1:sgpr_128 = COPY %2036.sub1 + %233:vgpr_32 = V_LSHLREV_B32_e64 1, %204.sub0, implicit $exec + %234:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %233, %245, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + %235:vgpr_32 = V_ADD_U32_e64 8, %204.sub0, 0, implicit $exec + %236:vgpr_32 = V_LSHLREV_B32_e64 1, %235, implicit $exec + %237:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %236, %245, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + %238:vgpr_32 = V_ADD_LSHL_U32_e64 %235, %19, 1, implicit $exec + %239:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %238, %245, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + %240:vgpr_32 = V_ADD_LSHL_U32_e64 %204.sub0, %19, 1, implicit $exec + %241:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %240, %245, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + %245.sub2:sgpr_128 = S_LSHL_B32 %22, 1, implicit-def dead $scc + %245.sub0:sgpr_128 = COPY %2036.sub2 + %245.sub1:sgpr_128 = COPY %2036.sub3 + %246:vgpr_32 = V_LSHLREV_B32_e64 1, %213.sub0, implicit $exec + %247:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %246, %245, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + %248:vgpr_32 = V_ADD_U32_e64 8, %213.sub0, 0, implicit $exec + %249:vgpr_32 = V_LSHLREV_B32_e64 1, %248, implicit $exec + %250:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %249, %245, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + %251:vgpr_32 = V_ADD_LSHL_U32_e64 %248, %21, 1, implicit $exec + %252:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %251, %245, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + %253:vgpr_32 = V_ADD_U32_e64 %21, %213.sub0, 0, implicit $exec + %254:vgpr_32 = V_LSHLREV_B32_e64 1, %253, implicit $exec + %255:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %254, %245, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + %256:vgpr_32 = V_ADD_U32_e64 %21, %253, 0, implicit $exec + %257:vgpr_32 = V_LSHLREV_B32_e64 1, %256, implicit $exec + %258:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %257, %245, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + %259:vgpr_32 = V_ADD_U32_e64 8, %256, 0, implicit $exec + %260:vgpr_32 = V_LSHLREV_B32_e64 1, %259, implicit $exec + %261:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %260, %245, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + %262:vgpr_32 = V_ADD_LSHL_U32_e64 %259, %21, 1, implicit $exec + %263:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %262, %245, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + %264:vgpr_32 = V_ADD_LSHL_U32_e64 %256, %21, 1, implicit $exec + %265:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %264, %245, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + %266:vgpr_32 = V_ADD_LSHL_U32_e64 %208, %209, 1, implicit $exec + DS_WRITE_B128_gfx9 %266, %234, 0, 0, implicit $exec :: (store (s128), addrspace 3) + DS_WRITE_B128_gfx9 %266, %241, 16, 0, implicit $exec :: (store (s128), addrspace 3) + DS_WRITE_B128_gfx9 %266, %239, 2080, 0, implicit $exec :: (store (s128), addrspace 3) + DS_WRITE_B128_gfx9 %266, %237, 2064, 0, implicit $exec :: (store (s128), addrspace 3) + %267:vgpr_32 = V_ADD_LSHL_U32_e64 %217, %218, 1, implicit $exec + DS_WRITE_B128_gfx9 %267, %247, 16496, 0, implicit $exec :: (store (s128), addrspace 3) + DS_WRITE_B128_gfx9 %267, %255, 16512, 0, implicit $exec :: (store (s128), addrspace 3) + DS_WRITE_B128_gfx9 %267, %258, 16528, 0, implicit $exec :: (store (s128), addrspace 3) + DS_WRITE_B128_gfx9 %267, %265, 16544, 0, implicit $exec :: (store (s128), addrspace 3) + DS_WRITE_B128_gfx9 %267, %263, 20656, 0, implicit $exec :: (store (s128), addrspace 3) + DS_WRITE_B128_gfx9 %267, %261, 20640, 0, implicit $exec :: (store (s128), addrspace 3) + DS_WRITE_B128_gfx9 %267, %252, 20624, 0, implicit $exec :: (store (s128), addrspace 3) + DS_WRITE_B128_gfx9 %267, %250, 20608, 0, implicit $exec :: (store (s128), addrspace 3) + %268:vgpr_32 = V_LSHLREV_B32_e64 1, %223, implicit $exec + %270:vgpr_32 = V_LSHL_OR_B32_e64 %221, 4, %268, implicit $exec + undef %1946.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %1946.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 2064, 0, implicit $exec :: (load (s128), addrspace 3) + %273:vgpr_32 = V_LSHLREV_B32_e64 4, %227, implicit $exec + undef %2010.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, 16496, 0, implicit $exec :: (load (s128), addrspace 3) + %2010.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, 20608, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1824.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, 17520, 0, implicit $exec :: (load (s128), addrspace 3) + %1824.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, 21632, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1958.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, 18544, 0, implicit $exec :: (load (s128), addrspace 3) + %1958.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, 22656, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1796.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, 19568, 0, implicit $exec :: (load (s128), addrspace 3) + %1796.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, 23680, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1906.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 512, 0, implicit $exec :: (load (s128), addrspace 3) + %1906.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 2576, 0, implicit $exec :: (load (s128), addrspace 3) + %330.sub1:sgpr_256 = COPY %330.sub0 + %330.sub2:sgpr_256 = COPY %330.sub0 + %330.sub3:sgpr_256 = COPY %330.sub0 + %330.sub4:sgpr_256 = COPY %330.sub0 + %330.sub5:sgpr_256 = COPY %330.sub0 + %330.sub6:sgpr_256 = COPY %330.sub0 + %330.sub7:sgpr_256 = COPY %330.sub0 + %822:vreg_256 = COPY %330 + early-clobber %531:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, %1906, 8, %2010, 8, %822, 0, 0, implicit $exec, implicit $exec + early-clobber %534:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, %1906, 8, %1958, 8, %822, 0, 0, implicit $exec, implicit $exec + undef %1768.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 1024, 0, implicit $exec :: (load (s128), addrspace 3) + %1768.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 3088, 0, implicit $exec :: (load (s128), addrspace 3) + early-clobber %548:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, %1768, 8, %2010, 8, %822, 0, 0, implicit $exec, implicit $exec + early-clobber %551:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, %1768, 8, %1958, 8, %822, 0, 0, implicit $exec, implicit $exec + undef %1740.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 1536, 0, implicit $exec :: (load (s128), addrspace 3) + %1740.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 3600, 0, implicit $exec :: (load (s128), addrspace 3) + early-clobber %565:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, %1740, 8, %2010, 8, %822, 0, 0, implicit $exec, implicit $exec + early-clobber %568:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, %1740, 8, %1958, 8, %822, 0, 0, implicit $exec, implicit $exec + undef %1728.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 4128, 0, implicit $exec :: (load (s128), addrspace 3) + %1728.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 6192, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1986.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, 24720, 0, implicit $exec :: (load (s128), addrspace 3) + %1986.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, 28832, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1712.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, 25744, 0, implicit $exec :: (load (s128), addrspace 3) + %1712.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, 29856, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1938.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, 26768, 0, implicit $exec :: (load (s128), addrspace 3) + %1938.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, 30880, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1854.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, 27792, 0, implicit $exec :: (load (s128), addrspace 3) + %1854.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, 31904, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1886.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 4640, 0, implicit $exec :: (load (s128), addrspace 3) + %1886.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 6704, 0, implicit $exec :: (load (s128), addrspace 3) + early-clobber %531:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1886, 8, %1986, 8, %531, 0, 0, implicit $exec + early-clobber %534:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1886, 8, %1938, 8, %534, 0, 0, implicit $exec + undef %1832.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 5152, 0, implicit $exec :: (load (s128), addrspace 3) + %1832.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 7216, 0, implicit $exec :: (load (s128), addrspace 3) + early-clobber %548:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1832, 8, %1986, 8, %548, 0, 0, implicit $exec + early-clobber %551:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1832, 8, %1938, 8, %551, 0, 0, implicit $exec + undef %1804.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 5664, 0, implicit $exec :: (load (s128), addrspace 3) + %1804.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 7728, 0, implicit $exec :: (load (s128), addrspace 3) + early-clobber %565:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1804, 8, %1986, 8, %565, 0, 0, implicit $exec + early-clobber %568:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1804, 8, %1938, 8, %568, 0, 0, implicit $exec + undef %1894.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 8256, 0, implicit $exec :: (load (s128), addrspace 3) + %1894.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 10320, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1978.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, -32592, 0, implicit $exec :: (load (s128), addrspace 3) + %1978.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, -28480, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1776.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, -31568, 0, implicit $exec :: (load (s128), addrspace 3) + %1776.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, -27456, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1930.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, -30544, 0, implicit $exec :: (load (s128), addrspace 3) + %1930.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, -26432, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1748.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, -29520, 0, implicit $exec :: (load (s128), addrspace 3) + %1748.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, -25408, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1878.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 8768, 0, implicit $exec :: (load (s128), addrspace 3) + %1878.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 10832, 0, implicit $exec :: (load (s128), addrspace 3) + early-clobber %531:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1878, 8, %1978, 8, %531, 0, 0, implicit $exec + early-clobber %534:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1878, 8, %1930, 8, %534, 0, 0, implicit $exec + undef %1720.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 9280, 0, implicit $exec :: (load (s128), addrspace 3) + %1720.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 11344, 0, implicit $exec :: (load (s128), addrspace 3) + early-clobber %548:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1720, 8, %1978, 8, %548, 0, 0, implicit $exec + early-clobber %551:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1720, 8, %1930, 8, %551, 0, 0, implicit $exec + undef %1862.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 9792, 0, implicit $exec :: (load (s128), addrspace 3) + %1862.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 11856, 0, implicit $exec :: (load (s128), addrspace 3) + early-clobber %565:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1862, 8, %1978, 8, %565, 0, 0, implicit $exec + early-clobber %568:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1862, 8, %1930, 8, %568, 0, 0, implicit $exec + undef %1870.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 12384, 0, implicit $exec :: (load (s128), addrspace 3) + %1870.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 14448, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1970.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, -24368, 0, implicit $exec :: (load (s128), addrspace 3) + %1970.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, -20256, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1840.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, -23344, 0, implicit $exec :: (load (s128), addrspace 3) + %1840.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, -19232, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1918.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, -22320, 0, implicit $exec :: (load (s128), addrspace 3) + %1918.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, -18208, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1812.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %273, -21296, 0, implicit $exec :: (load (s128), addrspace 3) + %1812.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %273, -17184, 0, implicit $exec :: (load (s128), addrspace 3) + undef %1784.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 12896, 0, implicit $exec :: (load (s128), addrspace 3) + %1784.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 14960, 0, implicit $exec :: (load (s128), addrspace 3) + early-clobber %531:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1784, 8, %1970, 8, %531, 0, 0, implicit $exec + early-clobber %534:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1784, 8, %1918, 8, %534, 0, 0, implicit $exec + undef %1998.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 13408, 0, implicit $exec :: (load (s128), addrspace 3) + %1998.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 15472, 0, implicit $exec :: (load (s128), addrspace 3) + early-clobber %548:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1998, 8, %1970, 8, %548, 0, 0, implicit $exec + early-clobber %551:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1998, 8, %1918, 8, %551, 0, 0, implicit $exec + undef %1756.sub0_sub1_sub2_sub3:vreg_256 = DS_READ_B128_gfx9 %270, 13920, 0, implicit $exec :: (load (s128), addrspace 3) + %1756.sub4_sub5_sub6_sub7:vreg_256 = DS_READ_B128_gfx9 %270, 15984, 0, implicit $exec :: (load (s128), addrspace 3) + early-clobber %565:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1756, 8, %1970, 8, %565, 0, 0, implicit $exec + early-clobber %568:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1756, 8, %1918, 8, %568, 0, 0, implicit $exec + early-clobber %730:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, %1946, 8, %2010, 8, %822, 0, 0, implicit $exec, implicit $exec + early-clobber %733:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, %1946, 8, %1958, 8, %822, 0, 0, implicit $exec, implicit $exec + early-clobber %736:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, %1906, 8, %1824, 8, %822, 0, 0, implicit $exec, implicit $exec + early-clobber %739:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, %1768, 8, %1824, 8, %822, 0, 0, implicit $exec, implicit $exec + early-clobber %742:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, %1740, 8, %1824, 8, %822, 0, 0, implicit $exec, implicit $exec + early-clobber %730:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1728, 8, %1986, 8, %730, 0, 0, implicit $exec + early-clobber %733:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1728, 8, %1938, 8, %733, 0, 0, implicit $exec + early-clobber %736:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1886, 8, %1712, 8, %736, 0, 0, implicit $exec + early-clobber %739:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1832, 8, %1712, 8, %739, 0, 0, implicit $exec + early-clobber %742:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1804, 8, %1712, 8, %742, 0, 0, implicit $exec + early-clobber %730:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1894, 8, %1978, 8, %730, 0, 0, implicit $exec + early-clobber %733:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1894, 8, %1930, 8, %733, 0, 0, implicit $exec + early-clobber %736:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1878, 8, %1776, 8, %736, 0, 0, implicit $exec + early-clobber %739:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1720, 8, %1776, 8, %739, 0, 0, implicit $exec + early-clobber %742:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1862, 8, %1776, 8, %742, 0, 0, implicit $exec + early-clobber %730:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1870, 8, %1970, 8, %730, 0, 0, implicit $exec + early-clobber %733:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1870, 8, %1918, 8, %733, 0, 0, implicit $exec + early-clobber %736:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1784, 8, %1840, 8, %736, 0, 0, implicit $exec + early-clobber %739:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1998, 8, %1840, 8, %739, 0, 0, implicit $exec + early-clobber %742:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1756, 8, %1840, 8, %742, 0, 0, implicit $exec + early-clobber %790:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, %1946, 8, %1796, 8, %822, 0, 0, implicit $exec, implicit $exec + early-clobber %793:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, %1768, 8, %1796, 8, %822, 0, 0, implicit $exec, implicit $exec + early-clobber %796:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, %1740, 8, %1796, 8, %822, 0, 0, implicit $exec, implicit $exec + early-clobber %790:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1728, 8, %1854, 8, %790, 0, 0, implicit $exec + early-clobber %793:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1832, 8, %1854, 8, %793, 0, 0, implicit $exec + early-clobber %796:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1804, 8, %1854, 8, %796, 0, 0, implicit $exec + early-clobber %790:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1894, 8, %1748, 8, %790, 0, 0, implicit $exec + early-clobber %793:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1720, 8, %1748, 8, %793, 0, 0, implicit $exec + early-clobber %796:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1862, 8, %1748, 8, %796, 0, 0, implicit $exec + early-clobber %790:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1870, 8, %1812, 8, %790, 0, 0, implicit $exec + early-clobber %793:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1998, 8, %1812, 8, %793, 0, 0, implicit $exec + early-clobber %812:vreg_256 = V_WMMA_F32_16X16X16_F16_threeaddr_w32 8, %1906, 8, %1796, 8, %822, 0, 0, implicit $exec, implicit $exec + early-clobber %812:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1886, 8, %1854, 8, %812, 0, 0, implicit $exec + early-clobber %812:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1878, 8, %1748, 8, %812, 0, 0, implicit $exec + early-clobber %812:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1784, 8, %1812, 8, %812, 0, 0, implicit $exec + early-clobber %822:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1946, 8, %1824, 8, %822, 0, 0, implicit $exec + early-clobber %822:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1728, 8, %1712, 8, %822, 0, 0, implicit $exec + early-clobber %822:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1894, 8, %1776, 8, %822, 0, 0, implicit $exec + early-clobber %822:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1870, 8, %1840, 8, %822, 0, 0, implicit $exec + early-clobber %796:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %1756, 8, %1812, 8, %796, 0, 0, implicit $exec + %834:vgpr_32 = V_LSHRREV_B32_e64 3, %0(s32), implicit $exec + %835:vgpr_32 = V_AND_B32_e64 8, %195, implicit $exec + %837:vgpr_32 = V_AND_OR_B32_e64 %834, 16, %835, implicit $exec + %839:vgpr_32 = V_AND_B32_e64 56, %219, implicit $exec + undef %2018.sub0:vreg_64 = V_OR_B32_e64 %189, %839, implicit $exec + %841:vgpr_32 = V_OR_B32_e64 %186, %834, implicit $exec + early-clobber %845:vreg_64, $sgpr_null = V_MAD_U64_U32_gfx11_e64 %841, %23, %2018, 0, implicit $exec + %850:vgpr_32 = V_LSHLREV_B32_e64 2, %227, implicit $exec + %851:vgpr_32 = V_LSHL_OR_B32_e64 %837, 8, %850, implicit $exec + DS_WRITE2ST64_B32_gfx9 %851, %730.sub0, %730.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %730.sub2, %730.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %730.sub4, %730.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %730.sub6, %730.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %859:vgpr_32 = V_LSHLREV_B32_e64 2, %839, implicit $exec + %860:vgpr_32 = V_LSHL_OR_B32_e64 %834, 8, %859, implicit $exec + %861:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %862:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %871:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %861.sub0, 0, 0, implicit $mode, implicit $exec + %872:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %861.sub1, 0, 0, implicit $mode, implicit $exec + %873:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %861.sub2, 0, 0, implicit $mode, implicit $exec + %874:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %861.sub3, 0, 0, implicit $mode, implicit $exec + %875:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %862.sub0, 0, 0, implicit $mode, implicit $exec + %876:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %862.sub1, 0, 0, implicit $mode, implicit $exec + %877:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %862.sub2, 0, 0, implicit $mode, implicit $exec + %878:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %862.sub3, 0, 0, implicit $mode, implicit $exec + undef %1926.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %877, 0, %878, 0, 0, implicit $mode, implicit $exec + %1926.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %875, 0, %876, 0, 0, implicit $mode, implicit $exec + %1926.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %873, 0, %874, 0, 0, implicit $mode, implicit $exec + %1926.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %871, 0, %872, 0, 0, implicit $mode, implicit $exec + %887.sub2:sgpr_128 = S_LSHL_B32 %24, 1, implicit-def dead $scc + %887.sub3:sgpr_128 = COPY %245.sub3 + %888:vgpr_32 = V_LSHLREV_B32_e64 1, %845.sub0, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %1926, %888, %887, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + DS_WRITE2ST64_B32_gfx9 %851, %822.sub0, %822.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %822.sub2, %822.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %822.sub4, %822.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %822.sub6, %822.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %898:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %899:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %908:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %898.sub0, 0, 0, implicit $mode, implicit $exec + %909:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %898.sub1, 0, 0, implicit $mode, implicit $exec + %910:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %898.sub2, 0, 0, implicit $mode, implicit $exec + %911:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %898.sub3, 0, 0, implicit $mode, implicit $exec + %912:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %899.sub0, 0, 0, implicit $mode, implicit $exec + %913:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %899.sub1, 0, 0, implicit $mode, implicit $exec + %914:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %899.sub2, 0, 0, implicit $mode, implicit $exec + %915:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %899.sub3, 0, 0, implicit $mode, implicit $exec + undef %1848.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %914, 0, %915, 0, 0, implicit $mode, implicit $exec + %1848.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %912, 0, %913, 0, 0, implicit $mode, implicit $exec + %1848.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %910, 0, %911, 0, 0, implicit $mode, implicit $exec + %1848.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %908, 0, %909, 0, 0, implicit $mode, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %1848, %888, %887, 0, 128, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + DS_WRITE2ST64_B32_gfx9 %851, %733.sub0, %733.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %733.sub2, %733.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %733.sub4, %733.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %733.sub6, %733.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %930:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %931:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %940:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %930.sub0, 0, 0, implicit $mode, implicit $exec + %941:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %930.sub1, 0, 0, implicit $mode, implicit $exec + %942:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %930.sub2, 0, 0, implicit $mode, implicit $exec + %943:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %930.sub3, 0, 0, implicit $mode, implicit $exec + %944:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %931.sub0, 0, 0, implicit $mode, implicit $exec + %945:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %931.sub1, 0, 0, implicit $mode, implicit $exec + %946:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %931.sub2, 0, 0, implicit $mode, implicit $exec + %947:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %931.sub3, 0, 0, implicit $mode, implicit $exec + undef %2024.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %946, 0, %947, 0, 0, implicit $mode, implicit $exec + %2024.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %944, 0, %945, 0, 0, implicit $mode, implicit $exec + %2024.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %942, 0, %943, 0, 0, implicit $mode, implicit $exec + %2024.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %940, 0, %941, 0, 0, implicit $mode, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %2024, %888, %887, 0, 256, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + %955:vgpr_32 = V_ADD_U32_e64 192, %845.sub0, 0, implicit $exec + DS_WRITE2ST64_B32_gfx9 %851, %790.sub0, %790.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %790.sub2, %790.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %790.sub4, %790.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %790.sub6, %790.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %964:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %965:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %974:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %964.sub0, 0, 0, implicit $mode, implicit $exec + %975:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %964.sub1, 0, 0, implicit $mode, implicit $exec + %976:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %964.sub2, 0, 0, implicit $mode, implicit $exec + %977:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %964.sub3, 0, 0, implicit $mode, implicit $exec + %978:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %965.sub0, 0, 0, implicit $mode, implicit $exec + %979:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %965.sub1, 0, 0, implicit $mode, implicit $exec + %980:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %965.sub2, 0, 0, implicit $mode, implicit $exec + %981:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %965.sub3, 0, 0, implicit $mode, implicit $exec + undef %1966.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %980, 0, %981, 0, 0, implicit $mode, implicit $exec + %1966.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %978, 0, %979, 0, 0, implicit $mode, implicit $exec + %1966.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %976, 0, %977, 0, 0, implicit $mode, implicit $exec + %1966.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %974, 0, %975, 0, 0, implicit $mode, implicit $exec + %987:vgpr_32 = V_LSHLREV_B32_e64 1, %955, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %1966, %987, %887, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + %990:sreg_32 = nsw S_LSHL_B32 %23, 5, implicit-def dead $scc + DS_WRITE2ST64_B32_gfx9 %851, %812.sub0, %812.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %812.sub2, %812.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %812.sub4, %812.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %812.sub6, %812.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %999:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %1000:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %1009:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %999.sub0, 0, 0, implicit $mode, implicit $exec + %1010:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %999.sub1, 0, 0, implicit $mode, implicit $exec + %1011:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %999.sub2, 0, 0, implicit $mode, implicit $exec + %1012:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %999.sub3, 0, 0, implicit $mode, implicit $exec + %1013:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1000.sub0, 0, 0, implicit $mode, implicit $exec + %1014:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1000.sub1, 0, 0, implicit $mode, implicit $exec + %1015:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1000.sub2, 0, 0, implicit $mode, implicit $exec + %1016:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1000.sub3, 0, 0, implicit $mode, implicit $exec + undef %1820.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %1015, 0, %1016, 0, 0, implicit $mode, implicit $exec + %1820.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %1013, 0, %1014, 0, 0, implicit $mode, implicit $exec + %1820.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %1011, 0, %1012, 0, 0, implicit $mode, implicit $exec + %1820.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %1009, 0, %1010, 0, 0, implicit $mode, implicit $exec + %1022:vgpr_32 = V_ADD_LSHL_U32_e64 %955, %990, 1, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %1820, %1022, %887, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + DS_WRITE2ST64_B32_gfx9 %851, %534.sub0, %534.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %534.sub2, %534.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %534.sub4, %534.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %534.sub6, %534.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %1032:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %1033:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %1042:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1032.sub0, 0, 0, implicit $mode, implicit $exec + %1043:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1032.sub1, 0, 0, implicit $mode, implicit $exec + %1044:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1032.sub2, 0, 0, implicit $mode, implicit $exec + %1045:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1032.sub3, 0, 0, implicit $mode, implicit $exec + %1046:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1033.sub0, 0, 0, implicit $mode, implicit $exec + %1047:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1033.sub1, 0, 0, implicit $mode, implicit $exec + %1048:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1033.sub2, 0, 0, implicit $mode, implicit $exec + %1049:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1033.sub3, 0, 0, implicit $mode, implicit $exec + undef %2032.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %1048, 0, %1049, 0, 0, implicit $mode, implicit $exec + %2032.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %1046, 0, %1047, 0, 0, implicit $mode, implicit $exec + %2032.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %1044, 0, %1045, 0, 0, implicit $mode, implicit $exec + %2032.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %1042, 0, %1043, 0, 0, implicit $mode, implicit $exec + %1056:vgpr_32 = V_ADD_U32_e64 -128, %1022, 0, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %2032, %1056, %887, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + DS_WRITE2ST64_B32_gfx9 %851, %736.sub0, %736.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %736.sub2, %736.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %736.sub4, %736.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %736.sub6, %736.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %1066:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %1067:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %1076:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1066.sub0, 0, 0, implicit $mode, implicit $exec + %1077:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1066.sub1, 0, 0, implicit $mode, implicit $exec + %1078:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1066.sub2, 0, 0, implicit $mode, implicit $exec + %1079:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1066.sub3, 0, 0, implicit $mode, implicit $exec + %1080:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1067.sub0, 0, 0, implicit $mode, implicit $exec + %1081:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1067.sub1, 0, 0, implicit $mode, implicit $exec + %1082:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1067.sub2, 0, 0, implicit $mode, implicit $exec + %1083:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1067.sub3, 0, 0, implicit $mode, implicit $exec + undef %2006.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %1082, 0, %1083, 0, 0, implicit $mode, implicit $exec + %2006.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %1080, 0, %1081, 0, 0, implicit $mode, implicit $exec + %2006.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %1078, 0, %1079, 0, 0, implicit $mode, implicit $exec + %2006.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %1076, 0, %1077, 0, 0, implicit $mode, implicit $exec + %1090:vgpr_32 = V_ADD_U32_e64 -256, %1022, 0, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %2006, %1090, %887, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + %1092:vgpr_32 = V_ADD_U32_e64 %990, %845.sub0, 0, implicit $exec + DS_WRITE2ST64_B32_gfx9 %851, %531.sub0, %531.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %531.sub2, %531.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %531.sub4, %531.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %531.sub6, %531.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %1101:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %1102:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %1111:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1101.sub0, 0, 0, implicit $mode, implicit $exec + %1112:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1101.sub1, 0, 0, implicit $mode, implicit $exec + %1113:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1101.sub2, 0, 0, implicit $mode, implicit $exec + %1114:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1101.sub3, 0, 0, implicit $mode, implicit $exec + %1115:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1102.sub0, 0, 0, implicit $mode, implicit $exec + %1116:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1102.sub1, 0, 0, implicit $mode, implicit $exec + %1117:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1102.sub2, 0, 0, implicit $mode, implicit $exec + %1118:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1102.sub3, 0, 0, implicit $mode, implicit $exec + undef %1914.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %1117, 0, %1118, 0, 0, implicit $mode, implicit $exec + %1914.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %1115, 0, %1116, 0, 0, implicit $mode, implicit $exec + %1914.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %1113, 0, %1114, 0, 0, implicit $mode, implicit $exec + %1914.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %1111, 0, %1112, 0, 0, implicit $mode, implicit $exec + %1124:vgpr_32 = V_LSHLREV_B32_e64 1, %1092, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %1914, %1124, %887, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + %1126:vgpr_32 = V_ADD_U32_e64 %990, %1092, 0, implicit $exec + DS_WRITE2ST64_B32_gfx9 %851, %548.sub0, %548.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %548.sub2, %548.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %548.sub4, %548.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %548.sub6, %548.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %1135:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %1136:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %1145:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1135.sub0, 0, 0, implicit $mode, implicit $exec + %1146:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1135.sub1, 0, 0, implicit $mode, implicit $exec + %1147:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1135.sub2, 0, 0, implicit $mode, implicit $exec + %1148:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1135.sub3, 0, 0, implicit $mode, implicit $exec + %1149:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1136.sub0, 0, 0, implicit $mode, implicit $exec + %1150:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1136.sub1, 0, 0, implicit $mode, implicit $exec + %1151:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1136.sub2, 0, 0, implicit $mode, implicit $exec + %1152:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1136.sub3, 0, 0, implicit $mode, implicit $exec + undef %1792.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %1151, 0, %1152, 0, 0, implicit $mode, implicit $exec + %1792.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %1149, 0, %1150, 0, 0, implicit $mode, implicit $exec + %1792.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %1147, 0, %1148, 0, 0, implicit $mode, implicit $exec + %1792.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %1145, 0, %1146, 0, 0, implicit $mode, implicit $exec + %1158:vgpr_32 = V_LSHLREV_B32_e64 1, %1126, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %1792, %1158, %887, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + DS_WRITE2ST64_B32_gfx9 %851, %739.sub0, %739.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %739.sub2, %739.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %739.sub4, %739.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %739.sub6, %739.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %1168:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %1169:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %1178:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1168.sub0, 0, 0, implicit $mode, implicit $exec + %1179:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1168.sub1, 0, 0, implicit $mode, implicit $exec + %1180:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1168.sub2, 0, 0, implicit $mode, implicit $exec + %1181:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1168.sub3, 0, 0, implicit $mode, implicit $exec + %1182:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1169.sub0, 0, 0, implicit $mode, implicit $exec + %1183:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1169.sub1, 0, 0, implicit $mode, implicit $exec + %1184:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1169.sub2, 0, 0, implicit $mode, implicit $exec + %1185:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1169.sub3, 0, 0, implicit $mode, implicit $exec + undef %2020.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %1184, 0, %1185, 0, 0, implicit $mode, implicit $exec + %2020.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %1182, 0, %1183, 0, 0, implicit $mode, implicit $exec + %2020.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %1180, 0, %1181, 0, 0, implicit $mode, implicit $exec + %2020.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %1178, 0, %1179, 0, 0, implicit $mode, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %2020, %1158, %887, 0, 128, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + DS_WRITE2ST64_B32_gfx9 %851, %551.sub0, %551.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %551.sub2, %551.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %551.sub4, %551.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %551.sub6, %551.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %1200:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %1201:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %1210:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1200.sub0, 0, 0, implicit $mode, implicit $exec + %1211:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1200.sub1, 0, 0, implicit $mode, implicit $exec + %1212:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1200.sub2, 0, 0, implicit $mode, implicit $exec + %1213:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1200.sub3, 0, 0, implicit $mode, implicit $exec + %1214:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1201.sub0, 0, 0, implicit $mode, implicit $exec + %1215:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1201.sub1, 0, 0, implicit $mode, implicit $exec + %1216:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1201.sub2, 0, 0, implicit $mode, implicit $exec + %1217:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1201.sub3, 0, 0, implicit $mode, implicit $exec + undef %1954.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %1216, 0, %1217, 0, 0, implicit $mode, implicit $exec + %1954.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %1214, 0, %1215, 0, 0, implicit $mode, implicit $exec + %1954.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %1212, 0, %1213, 0, 0, implicit $mode, implicit $exec + %1954.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %1210, 0, %1211, 0, 0, implicit $mode, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %1954, %1158, %887, 0, 256, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + %1224:vgpr_32 = V_ADD_U32_e64 192, %1126, 0, implicit $exec + DS_WRITE2ST64_B32_gfx9 %851, %793.sub0, %793.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %793.sub2, %793.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %793.sub4, %793.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %793.sub6, %793.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %1233:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %1234:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %1243:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1233.sub0, 0, 0, implicit $mode, implicit $exec + %1244:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1233.sub1, 0, 0, implicit $mode, implicit $exec + %1245:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1233.sub2, 0, 0, implicit $mode, implicit $exec + %1246:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1233.sub3, 0, 0, implicit $mode, implicit $exec + %1247:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1234.sub0, 0, 0, implicit $mode, implicit $exec + %1248:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1234.sub1, 0, 0, implicit $mode, implicit $exec + %1249:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1234.sub2, 0, 0, implicit $mode, implicit $exec + %1250:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1234.sub3, 0, 0, implicit $mode, implicit $exec + undef %1764.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %1249, 0, %1250, 0, 0, implicit $mode, implicit $exec + %1764.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %1247, 0, %1248, 0, 0, implicit $mode, implicit $exec + %1764.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %1245, 0, %1246, 0, 0, implicit $mode, implicit $exec + %1764.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %1243, 0, %1244, 0, 0, implicit $mode, implicit $exec + %1256:vgpr_32 = V_LSHLREV_B32_e64 1, %1224, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %1764, %1256, %887, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + DS_WRITE2ST64_B32_gfx9 %851, %796.sub0, %796.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %796.sub2, %796.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %796.sub4, %796.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %796.sub6, %796.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %1266:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %1267:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %1276:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1266.sub0, 0, 0, implicit $mode, implicit $exec + %1277:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1266.sub1, 0, 0, implicit $mode, implicit $exec + %1278:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1266.sub2, 0, 0, implicit $mode, implicit $exec + %1279:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1266.sub3, 0, 0, implicit $mode, implicit $exec + %1280:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1267.sub0, 0, 0, implicit $mode, implicit $exec + %1281:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1267.sub1, 0, 0, implicit $mode, implicit $exec + %1282:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1267.sub2, 0, 0, implicit $mode, implicit $exec + %1283:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1267.sub3, 0, 0, implicit $mode, implicit $exec + undef %2028.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %1282, 0, %1283, 0, 0, implicit $mode, implicit $exec + %2028.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %1280, 0, %1281, 0, 0, implicit $mode, implicit $exec + %2028.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %1278, 0, %1279, 0, 0, implicit $mode, implicit $exec + %2028.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %1276, 0, %1277, 0, 0, implicit $mode, implicit $exec + %1289:vgpr_32 = V_ADD_LSHL_U32_e64 %1224, %990, 1, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %2028, %1289, %887, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + DS_WRITE2ST64_B32_gfx9 %851, %568.sub0, %568.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %568.sub2, %568.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %568.sub4, %568.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %568.sub6, %568.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %1299:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %1300:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %1309:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1299.sub0, 0, 0, implicit $mode, implicit $exec + %1310:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1299.sub1, 0, 0, implicit $mode, implicit $exec + %1311:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1299.sub2, 0, 0, implicit $mode, implicit $exec + %1312:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1299.sub3, 0, 0, implicit $mode, implicit $exec + %1313:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1300.sub0, 0, 0, implicit $mode, implicit $exec + %1314:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1300.sub1, 0, 0, implicit $mode, implicit $exec + %1315:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1300.sub2, 0, 0, implicit $mode, implicit $exec + %1316:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1300.sub3, 0, 0, implicit $mode, implicit $exec + undef %1994.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %1315, 0, %1316, 0, 0, implicit $mode, implicit $exec + %1994.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %1313, 0, %1314, 0, 0, implicit $mode, implicit $exec + %1994.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %1311, 0, %1312, 0, 0, implicit $mode, implicit $exec + %1994.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %1309, 0, %1310, 0, 0, implicit $mode, implicit $exec + %1322:vgpr_32 = V_ADD_U32_e64 -128, %1289, 0, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %1994, %1322, %887, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + DS_WRITE2ST64_B32_gfx9 %851, %742.sub0, %742.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %742.sub2, %742.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %742.sub4, %742.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %742.sub6, %742.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %1332:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %1333:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %1342:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1332.sub0, 0, 0, implicit $mode, implicit $exec + %1343:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1332.sub1, 0, 0, implicit $mode, implicit $exec + %1344:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1332.sub2, 0, 0, implicit $mode, implicit $exec + %1345:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1332.sub3, 0, 0, implicit $mode, implicit $exec + %1346:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1333.sub0, 0, 0, implicit $mode, implicit $exec + %1347:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1333.sub1, 0, 0, implicit $mode, implicit $exec + %1348:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1333.sub2, 0, 0, implicit $mode, implicit $exec + %1349:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1333.sub3, 0, 0, implicit $mode, implicit $exec + undef %1902.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %1348, 0, %1349, 0, 0, implicit $mode, implicit $exec + %1902.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %1346, 0, %1347, 0, 0, implicit $mode, implicit $exec + %1902.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %1344, 0, %1345, 0, 0, implicit $mode, implicit $exec + %1902.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %1342, 0, %1343, 0, 0, implicit $mode, implicit $exec + %1355:vgpr_32 = V_ADD_U32_e64 -256, %1289, 0, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %1902, %1355, %887, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + DS_WRITE2ST64_B32_gfx9 %851, %565.sub0, %565.sub1, 0, 1, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %565.sub2, %565.sub3, 2, 3, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %565.sub4, %565.sub5, 4, 5, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE2ST64_B32_gfx9 %851, %565.sub6, %565.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3) + %1365:vreg_128 = DS_READ_B128_gfx9 %860, 0, 0, implicit $exec :: (load (s128), addrspace 3) + %1366:vreg_128 = DS_READ_B128_gfx9 %860, 16, 0, implicit $exec :: (load (s128), addrspace 3) + %1375:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1365.sub0, 0, 0, implicit $mode, implicit $exec + %1376:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1365.sub1, 0, 0, implicit $mode, implicit $exec + %1377:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1365.sub2, 0, 0, implicit $mode, implicit $exec + %1378:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1365.sub3, 0, 0, implicit $mode, implicit $exec + %1379:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1366.sub0, 0, 0, implicit $mode, implicit $exec + %1380:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1366.sub1, 0, 0, implicit $mode, implicit $exec + %1381:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1366.sub2, 0, 0, implicit $mode, implicit $exec + %1382:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %1366.sub3, 0, 0, implicit $mode, implicit $exec + undef %1736.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %1381, 0, %1382, 0, 0, implicit $mode, implicit $exec + %1736.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %1379, 0, %1380, 0, 0, implicit $mode, implicit $exec + %1736.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %1377, 0, %1378, 0, 0, implicit $mode, implicit $exec + %1736.sub0:vreg_128 = V_PACK_B32_F16_e64 0, %1375, 0, %1376, 0, 0, implicit $mode, implicit $exec + %1388:vgpr_32 = V_ADD_LSHL_U32_e64 %1126, %990, 1, implicit $exec + BUFFER_STORE_DWORDX4_OFFEN_exact %1736, %1388, %887, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + S_ENDPGM 0 + +...