diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -612,10 +612,12 @@ // Whether this is legal depends on the floating point mode for the function. auto &FMad = getActionDefinitionsBuilder(G_FMAD); - if (ST.hasMadF16()) + if (ST.hasMadF16() && ST.hasMadMacF32Insts()) FMad.customFor({S32, S16}); - else + else if (ST.hasMadMacF32Insts()) FMad.customFor({S32}); + else if (ST.hasMadF16()) + FMad.customFor({S16}); FMad.scalarize(0) .lower(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir @@ -1,7 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX7 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX10 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX101 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1030 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX103 %s --- name: test_fmad_s32_flush @@ -26,12 +27,19 @@ ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX7: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]] ; GFX7: $vgpr0 = COPY [[FMAD]](s32) - ; GFX10-LABEL: name: test_fmad_s32_flush - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10: $vgpr0 = COPY [[FMAD]](s32) + ; GFX101-LABEL: name: test_fmad_s32_flush + ; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]] + ; GFX101: $vgpr0 = COPY [[FMAD]](s32) + ; GFX103-LABEL: name: test_fmad_s32_flush + ; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] + ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] + ; GFX103: $vgpr0 = COPY [[FADD]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -62,12 +70,19 @@ ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX7: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]] ; GFX7: $vgpr0 = COPY [[FMAD]](s32) - ; GFX10-LABEL: name: test_fmad_s32_flags_flush - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10: $vgpr0 = COPY [[FMAD]](s32) + ; GFX101-LABEL: name: test_fmad_s32_flags_flush + ; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX101: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]] + ; GFX101: $vgpr0 = COPY [[FMAD]](s32) + ; GFX103-LABEL: name: test_fmad_s32_flags_flush + ; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] + ; GFX103: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] + ; GFX103: $vgpr0 = COPY [[FADD]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -108,17 +123,30 @@ ; GFX7: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]] ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32) ; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX10-LABEL: name: test_fmad_v2s32_flush - ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]] - ; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]] - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32) - ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX101-LABEL: name: test_fmad_v2s32_flush + ; GFX101: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX101: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX101: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX101: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]] + ; GFX101: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]] + ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32) + ; GFX101: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX103-LABEL: name: test_fmad_v2s32_flush + ; GFX103: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX103: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX103: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX103: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] + ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]] + ; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] + ; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]] + ; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) + ; GFX103: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 @@ -161,18 +189,33 @@ ; GFX7: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]] ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32) ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-LABEL: name: test_fmad_v3s32_flush - ; GFX10: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX10: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX10: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX10: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]] - ; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]] - ; GFX10: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]] - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32) - ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX101-LABEL: name: test_fmad_v3s32_flush + ; GFX101: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX101: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX101: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX101: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX101: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]] + ; GFX101: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]] + ; GFX101: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]] + ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32) + ; GFX101: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX103-LABEL: name: test_fmad_v3s32_flush + ; GFX103: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX103: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX103: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX103: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX103: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] + ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]] + ; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] + ; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] + ; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] + ; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]] + ; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) + ; GFX103: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 @@ -217,19 +260,36 @@ ; GFX7: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]] ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-LABEL: name: test_fmad_v4s32_flush - ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX10: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; GFX10: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]] - ; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]] - ; GFX10: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]] - ; GFX10: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]] - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32) - ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX101-LABEL: name: test_fmad_v4s32_flush + ; GFX101: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX101: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX101: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX101: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]] + ; GFX101: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]] + ; GFX101: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]] + ; GFX101: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]] + ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32) + ; GFX101: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX103-LABEL: name: test_fmad_v4s32_flush + ; GFX103: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX103: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX103: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX103: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] + ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] + ; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] + ; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]] + ; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] + ; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]] + ; GFX103: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] + ; GFX103: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]] + ; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32) + ; GFX103: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 @@ -262,13 +322,20 @@ ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX7: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-LABEL: name: test_fmad_s32_denorm - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] - ; GFX10: $vgpr0 = COPY [[FADD]](s32) + ; GFX101-LABEL: name: test_fmad_s32_denorm + ; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] + ; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] + ; GFX101: $vgpr0 = COPY [[FADD]](s32) + ; GFX103-LABEL: name: test_fmad_s32_denorm + ; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] + ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] + ; GFX103: $vgpr0 = COPY [[FADD]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -301,13 +368,20 @@ ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] ; GFX7: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] ; GFX7: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-LABEL: name: test_fmad_s32_flags_denorm - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] - ; GFX10: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] - ; GFX10: $vgpr0 = COPY [[FADD]](s32) + ; GFX101-LABEL: name: test_fmad_s32_flags_denorm + ; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX101: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] + ; GFX101: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] + ; GFX101: $vgpr0 = COPY [[FADD]](s32) + ; GFX103-LABEL: name: test_fmad_s32_flags_denorm + ; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] + ; GFX103: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] + ; GFX103: $vgpr0 = COPY [[FADD]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -352,19 +426,32 @@ ; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]] ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX10-LABEL: name: test_fmad_v2s32_denorm - ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] - ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]] - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] - ; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]] - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX101-LABEL: name: test_fmad_v2s32_denorm + ; GFX101: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX101: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX101: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX101: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] + ; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]] + ; GFX101: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] + ; GFX101: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]] + ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) + ; GFX101: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX103-LABEL: name: test_fmad_v2s32_denorm + ; GFX103: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX103: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX103: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX103: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] + ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]] + ; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] + ; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]] + ; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) + ; GFX103: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 @@ -413,21 +500,36 @@ ; GFX7: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]] ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-LABEL: name: test_fmad_v3s32_denorm - ; GFX10: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX10: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX10: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX10: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] - ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]] - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] - ; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] - ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] - ; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]] - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) - ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX101-LABEL: name: test_fmad_v3s32_denorm + ; GFX101: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX101: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX101: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX101: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX101: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] + ; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]] + ; GFX101: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] + ; GFX101: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] + ; GFX101: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] + ; GFX101: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]] + ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) + ; GFX101: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX103-LABEL: name: test_fmad_v3s32_denorm + ; GFX103: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX103: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX103: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX103: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX103: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] + ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]] + ; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] + ; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] + ; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] + ; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]] + ; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) + ; GFX103: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 @@ -480,23 +582,40 @@ ; GFX7: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]] ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - ; GFX10-LABEL: name: test_fmad_v4s32_denorm - ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX10: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; GFX10: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] - ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] - ; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]] - ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] - ; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]] - ; GFX10: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] - ; GFX10: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]] - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32) - ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX101-LABEL: name: test_fmad_v4s32_denorm + ; GFX101: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX101: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX101: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX101: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] + ; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] + ; GFX101: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] + ; GFX101: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]] + ; GFX101: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] + ; GFX101: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]] + ; GFX101: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] + ; GFX101: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]] + ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32) + ; GFX101: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX103-LABEL: name: test_fmad_v4s32_denorm + ; GFX103: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX103: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX103: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX103: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] + ; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] + ; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] + ; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]] + ; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] + ; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]] + ; GFX103: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] + ; GFX103: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]] + ; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32) + ; GFX103: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s -; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. @@ -3147,7 +3147,7 @@ ; CGP-NEXT: v_rcp_f32_e32 v2, v1 ; CGP-NEXT: v_mul_f32_e32 v2, v0, v2 ; CGP-NEXT: v_trunc_f32_e32 v2, v2 -; CGP-NEXT: v_fma_f32 v0, -v2, v1, v0 +; CGP-NEXT: v_mad_f32 v0, -v2, v1, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v1 ; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] @@ -3444,9 +3444,9 @@ ; CGP-NEXT: v_mul_f32_e32 v6, v2, v6 ; CGP-NEXT: v_trunc_f32_e32 v5, v5 ; CGP-NEXT: v_trunc_f32_e32 v6, v6 -; CGP-NEXT: v_fma_f32 v0, -v5, v3, v0 +; CGP-NEXT: v_mad_f32 v0, -v5, v3, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_fma_f32 v2, -v6, v4, v2 +; CGP-NEXT: v_mad_f32 v2, -v6, v4, v2 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v3 ; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s -; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. @@ -3116,7 +3116,7 @@ ; CGP-NEXT: v_rcp_f32_e32 v4, v3 ; CGP-NEXT: v_mul_f32_e32 v4, v2, v4 ; CGP-NEXT: v_trunc_f32_e32 v4, v4 -; CGP-NEXT: v_fma_f32 v2, -v4, v3, v2 +; CGP-NEXT: v_mad_f32 v2, -v4, v3, v2 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, v3 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] @@ -3411,9 +3411,9 @@ ; CGP-NEXT: v_mul_f32_e32 v10, v7, v10 ; CGP-NEXT: v_trunc_f32_e32 v9, v9 ; CGP-NEXT: v_trunc_f32_e32 v10, v10 -; CGP-NEXT: v_fma_f32 v5, -v9, v6, v5 +; CGP-NEXT: v_mad_f32 v5, -v9, v6, v5 ; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 -; CGP-NEXT: v_fma_f32 v7, -v10, v8, v7 +; CGP-NEXT: v_mad_f32 v7, -v10, v8, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v5|, v6 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5]