diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir @@ -1,11 +1,11 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s # GFX6/7 selection should fail. # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX6 %s -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX7 %s --- name: atomicrmw_fadd_s32_local @@ -18,24 +18,42 @@ ; GFX8-LABEL: name: atomicrmw_fadd_s32_local ; GFX8: liveins: $vgpr0, $vgpr1 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: $m0 = S_MOV_B32 -1 - ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX8: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: $m0 = S_MOV_B32 -1 + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_local ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] + ; GFX10-LABEL: name: atomicrmw_fadd_s32_local + ; GFX10: liveins: $vgpr0, $vgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX10-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_local ; GFX6: liveins: $vgpr0, $vgpr1 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) - ; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX6-NEXT: $m0 = S_MOV_B32 -1 + ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX7-LABEL: name: atomicrmw_fadd_s32_local + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX7-NEXT: $m0 = S_MOV_B32 -1 + ; GFX7-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst (s32), addrspace 3) @@ -54,21 +72,37 @@ ; GFX8-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX8: liveins: $vgpr0, $vgpr1 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: $m0 = S_MOV_B32 -1 - ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: $m0 = S_MOV_B32 -1 + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX9-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX10-LABEL: name: atomicrmw_fadd_s32_local_noret + ; GFX10: liveins: $vgpr0, $vgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX6: liveins: $vgpr0, $vgpr1 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX6-NEXT: $m0 = S_MOV_B32 -1 + ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; GFX7-LABEL: name: atomicrmw_fadd_s32_local_noret + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX7-NEXT: $m0 = S_MOV_B32 -1 + ; GFX7-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst (s32), addrspace 3) @@ -86,26 +120,46 @@ ; GFX8-LABEL: name: atomicrmw_fadd_s32_local_gep4 ; GFX8: liveins: $vgpr0, $vgpr1 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: $m0 = S_MOV_B32 -1 - ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX8: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: $m0 = S_MOV_B32 -1 + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] + ; GFX10-LABEL: name: atomicrmw_fadd_s32_local_gep4 + ; GFX10: liveins: $vgpr0, $vgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX10-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_gep4 ; GFX6: liveins: $vgpr0, $vgpr1 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) - ; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 + ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX6-NEXT: $m0 = S_MOV_B32 -1 + ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX7-LABEL: name: atomicrmw_fadd_s32_local_gep4 + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX7-NEXT: $m0 = S_MOV_B32 -1 + ; GFX7-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CONSTANT i32 4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir @@ -1,11 +1,11 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s # GFX6/7 selection should fail. # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX6 %s -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX7 %s --- name: atomicrmw_fadd_s32_region @@ -18,24 +18,42 @@ ; GFX8-LABEL: name: atomicrmw_fadd_s32_region ; GFX8: liveins: $vgpr0, $vgpr1 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: $m0 = S_MOV_B32 -1 - ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX8: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: $m0 = S_MOV_B32 -1 + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_region ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX10-LABEL: name: atomicrmw_fadd_s32_region + ; GFX10: liveins: $vgpr0, $vgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX10-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_region ; GFX6: liveins: $vgpr0, $vgpr1 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) - ; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX6-NEXT: $m0 = S_MOV_B32 -1 + ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX7-LABEL: name: atomicrmw_fadd_s32_region + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX7-NEXT: $m0 = S_MOV_B32 -1 + ; GFX7-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) + ; GFX7-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p2), %1 :: (load store seq_cst (s32), addrspace 2) @@ -54,21 +72,37 @@ ; GFX8-LABEL: name: atomicrmw_fadd_s32_region_noret ; GFX8: liveins: $vgpr0, $vgpr1 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: $m0 = S_MOV_B32 -1 - ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: $m0 = S_MOV_B32 -1 + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX9-LABEL: name: atomicrmw_fadd_s32_region_noret ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX10-LABEL: name: atomicrmw_fadd_s32_region_noret + ; GFX10: liveins: $vgpr0, $vgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX6-LABEL: name: atomicrmw_fadd_s32_region_noret ; GFX6: liveins: $vgpr0, $vgpr1 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX6-NEXT: $m0 = S_MOV_B32 -1 + ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) + ; GFX7-LABEL: name: atomicrmw_fadd_s32_region_noret + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX7-NEXT: $m0 = S_MOV_B32 -1 + ; GFX7-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p2), %1 :: (load store seq_cst (s32), addrspace 2) @@ -86,26 +120,46 @@ ; GFX8-LABEL: name: atomicrmw_fadd_s32_region_gep4 ; GFX8: liveins: $vgpr0, $vgpr1 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: $m0 = S_MOV_B32 -1 - ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX8: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: $m0 = S_MOV_B32 -1 + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX10-LABEL: name: atomicrmw_fadd_s32_region_gep4 + ; GFX10: liveins: $vgpr0, $vgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX10-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_region_gep4 ; GFX6: liveins: $vgpr0, $vgpr1 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p2) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) - ; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 + ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p2) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX6-NEXT: $m0 = S_MOV_B32 -1 + ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX7-LABEL: name: atomicrmw_fadd_s32_region_gep4 + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 + ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p2) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX7-NEXT: $m0 = S_MOV_B32 -1 + ; GFX7-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) + ; GFX7-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CONSTANT i32 4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll @@ -27,25 +27,27 @@ ; GFX9-NEXT: ; return to shader part epilog ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $sgpr2, $sgpr3 - ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3 + ; GFX8-MIR-NEXT: {{ $}} + ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $sgpr2, $sgpr3 - ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3 + ; GFX9-MIR-NEXT: {{ $}} + ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret } @@ -69,25 +71,27 @@ ; GFX9-NEXT: ; return to shader part epilog ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_offset ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $sgpr2, $sgpr3 - ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3 + ; GFX8-MIR-NEXT: {{ $}} + ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 + ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $sgpr2, $sgpr3 - ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3 + ; GFX9-MIR-NEXT: {{ $}} + ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) ret float %ret @@ -110,23 +114,25 @@ ; GFX9-NEXT: s_endpgm ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_nortn ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $sgpr2, $sgpr3 - ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX8-MIR: S_ENDPGM 0 + ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3 + ; GFX8-MIR-NEXT: {{ $}} + ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: S_ENDPGM 0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_nortn ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $sgpr2, $sgpr3 - ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX9-MIR: S_ENDPGM 0 + ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3 + ; GFX9-MIR-NEXT: {{ $}} + ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: S_ENDPGM 0 %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret void } @@ -148,23 +154,25 @@ ; GFX9-NEXT: s_endpgm ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $sgpr2, $sgpr3 - ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX8-MIR: S_ENDPGM 0 + ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3 + ; GFX8-MIR-NEXT: {{ $}} + ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 + ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX8-MIR-NEXT: S_ENDPGM 0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $sgpr2, $sgpr3 - ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX9-MIR: S_ENDPGM 0 + ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3 + ; GFX9-MIR-NEXT: {{ $}} + ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX9-MIR-NEXT: S_ENDPGM 0 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) ret void @@ -187,25 +195,27 @@ ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR-NEXT: {{ $}} + ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-MIR-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR-NEXT: {{ $}} + ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-MIR-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret } @@ -227,25 +237,27 @@ ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR-NEXT: {{ $}} + ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-MIR-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR-NEXT: {{ $}} + ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-MIR-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) ret float %ret @@ -268,23 +280,25 @@ ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_nortn ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR-NEXT: {{ $}} + ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-MIR-NEXT: S_SETPC_B64_return [[COPY3]] ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR-NEXT: {{ $}} + ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-MIR-NEXT: S_SETPC_B64_return [[COPY3]] %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret void } @@ -306,23 +320,25 @@ ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR-NEXT: {{ $}} + ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-MIR-NEXT: S_SETPC_B64_return [[COPY3]] ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR-NEXT: {{ $}} + ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-MIR-NEXT: S_SETPC_B64_return [[COPY3]] %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) ret void @@ -345,25 +361,27 @@ ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_volatile ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) - ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR-NEXT: {{ $}} + ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-MIR-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_volatile ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) - ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR-NEXT: {{ $}} + ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-MIR-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true) ret float %ret }