diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1372,61 +1372,48 @@ >; } + /********** ================================ **********/ /********** Floating point absolute/negative **********/ /********** ================================ **********/ -// Prevent expanding both fneg and fabs. -// TODO: Add IgnoredBySelectionDAG bit? -let AddedComplexity = 1 in { // Prefer SALU to VALU patterns for DAG - def : GCNPat < - (fneg (fabs (f32 SReg_32:$src))), + (UniformUnaryFrag (fabs (f32 SReg_32:$src))), (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000))) // Set sign bit >; def : GCNPat < - (fabs (f32 SReg_32:$src)), + (UniformUnaryFrag (f32 SReg_32:$src)), (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fffffff))) >; def : GCNPat < - (fneg (f32 SReg_32:$src)), + (UniformUnaryFrag (f32 SReg_32:$src)), (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000))) >; def : GCNPat < - (fneg (f16 SReg_32:$src)), + (UniformUnaryFrag (f16 SReg_32:$src)), (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000))) >; def : GCNPat < - (fneg (f16 VGPR_32:$src)), - (V_XOR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) ->; - -def : GCNPat < - (fabs (f16 SReg_32:$src)), + (UniformUnaryFrag (f16 SReg_32:$src)), (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00007fff))) >; def : GCNPat < - (fneg (fabs (f16 SReg_32:$src))), + (UniformUnaryFrag (fabs (f16 SReg_32:$src))), (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit >; def : GCNPat < - (fneg (fabs (f16 VGPR_32:$src))), - (V_OR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit ->; - -def : GCNPat < - (fneg (v2f16 SReg_32:$src)), + (UniformUnaryFrag (v2f16 SReg_32:$src)), (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) >; def : GCNPat < - (fabs (v2f16 SReg_32:$src)), + (UniformUnaryFrag (v2f16 SReg_32:$src)), (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fff7fff))) >; @@ -1435,51 +1422,20 @@ // fabs is not reported as free because there is modifier for it in // VOP3P instructions, so it is turned into the bit op. def : GCNPat < - (fneg (v2f16 (bitconvert (and_oneuse (i32 SReg_32:$src), 0x7fff7fff)))), + (UniformUnaryFrag (v2f16 (bitconvert (and_oneuse (i32 SReg_32:$src), 0x7fff7fff)))), (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit >; def : GCNPat < - (fneg (v2f16 (fabs SReg_32:$src))), + (UniformUnaryFrag (v2f16 (fabs SReg_32:$src))), (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit >; -// FIXME: The implicit-def of scc from S_[X]OR/AND_B32 is mishandled - // def : GCNPat < -// (fneg (f64 SReg_64:$src)), -// (REG_SEQUENCE SReg_64, -// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), -// sub0, -// (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), -// (i32 (S_MOV_B32 (i32 0x80000000)))), -// sub1) -// >; - -// def : GCNPat < -// (fneg (fabs (f64 SReg_64:$src))), -// (REG_SEQUENCE SReg_64, -// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), -// sub0, -// (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), -// (S_MOV_B32 (i32 0x80000000))), // Set sign bit. -// sub1) -// >; - -// FIXME: Use S_BITSET0_B32/B64? -// def : GCNPat < -// (fabs (f64 SReg_64:$src)), -// (REG_SEQUENCE SReg_64, -// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), -// sub0, -// (S_AND_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), -// (i32 (S_MOV_B32 (i32 0x7fffffff)))), -// sub1) -// >; // COPY_TO_REGCLASS is needed to avoid using SCC from S_XOR_B32 instead // of the real value. def : GCNPat < - (fneg (v2f32 SReg_64:$src)), + (UniformUnaryFrag (v2f32 SReg_64:$src)), (v2f32 (REG_SEQUENCE SReg_64, (f32 (COPY_TO_REGCLASS (S_XOR_B32 (i32 (EXTRACT_SUBREG $src, sub0)), (i32 (S_MOV_B32 (i32 0x80000000)))), @@ -1489,36 +1445,103 @@ SReg_32)), sub1)) >; -} // End let AddedComplexity = 1 +def : GCNPat < + (UniformUnaryFrag (v2f32 SReg_64:$src)), + (v2f32 (REG_SEQUENCE SReg_64, + (f32 (COPY_TO_REGCLASS (S_AND_B32 (i32 (EXTRACT_SUBREG $src, sub0)), + (i32 (S_MOV_B32 (i32 0x7fffffff)))), + SReg_32)), sub0, + (f32 (COPY_TO_REGCLASS (S_AND_B32 (i32 (EXTRACT_SUBREG $src, sub1)), + (i32 (S_MOV_B32 (i32 0x7fffffff)))), + SReg_32)), sub1)) +>; + +def : GCNPat < + (UniformUnaryFrag (fabs (v2f32 SReg_64:$src))), + (v2f32 (REG_SEQUENCE SReg_64, + (f32 (COPY_TO_REGCLASS (S_OR_B32 (i32 (EXTRACT_SUBREG $src, sub0)), + (i32 (S_MOV_B32 (i32 0x80000000)))), + SReg_32)), sub0, + (f32 (COPY_TO_REGCLASS (S_OR_B32 (i32 (EXTRACT_SUBREG $src, sub1)), + (i32 (S_MOV_B32 (i32 0x80000000)))), + SReg_32)), sub1)) +>; + +// FIXME: Use S_BITSET0_B32/B64? +def : GCNPat < + (UniformUnaryFrag (f64 SReg_64:$src)), + (REG_SEQUENCE SReg_64, + (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), + sub0, + (i32 (COPY_TO_REGCLASS (S_AND_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), + (S_MOV_B32 (i32 0x7fffffff))), SReg_32)), // Set sign bit. + sub1) +>; + +def : GCNPat < + (UniformUnaryFrag (f64 SReg_64:$src)), + (REG_SEQUENCE SReg_64, + (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), + sub0, + (i32 (COPY_TO_REGCLASS (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), + (i32 (S_MOV_B32 (i32 0x80000000)))), SReg_32)), + sub1) +>; + +def : GCNPat < + (UniformUnaryFrag (fabs (f64 SReg_64:$src))), + (REG_SEQUENCE SReg_64, + (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), + sub0, + (i32 (COPY_TO_REGCLASS (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), + (S_MOV_B32 (i32 0x80000000))), SReg_32)),// Set sign bit. + sub1) +>; + + +def : GCNPat < + (fneg (fabs (f32 VGPR_32:$src))), + (V_OR_B32_e64 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src) // Set sign bit +>; def : GCNPat < (fabs (f32 VGPR_32:$src)), - (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fffffff)), VGPR_32:$src) + (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fffffff)), VGPR_32:$src) >; def : GCNPat < (fneg (f32 VGPR_32:$src)), - (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src) + (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src) >; def : GCNPat < (fabs (f16 VGPR_32:$src)), - (V_AND_B32_e32 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src) + (V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src) +>; + +def : GCNPat < + (fneg (f16 VGPR_32:$src)), + (V_XOR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) +>; + +def : GCNPat < + (fneg (fabs (f16 VGPR_32:$src))), + (V_OR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit >; def : GCNPat < (fneg (v2f16 VGPR_32:$src)), - (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) + (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) >; def : GCNPat < (fabs (v2f16 VGPR_32:$src)), - (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fff7fff)), VGPR_32:$src) + (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fff7fff)), VGPR_32:$src) >; def : GCNPat < (fneg (v2f16 (fabs VGPR_32:$src))), - (V_OR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) // Set sign bit + (V_OR_B32_e64 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) >; def : GCNPat < @@ -1526,30 +1549,28 @@ (REG_SEQUENCE VReg_64, (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)), sub0, - (V_AND_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)), - (V_MOV_B32_e32 (i32 0x7fffffff))), // Set sign bit. + (V_AND_B32_e64 (i32 (S_MOV_B32 (i32 0x7fffffff))), + (i32 (EXTRACT_SUBREG VReg_64:$src, sub1))), sub1) >; -// TODO: Use SGPR for constant def : GCNPat < (fneg (f64 VReg_64:$src)), (REG_SEQUENCE VReg_64, (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)), sub0, - (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)), - (i32 (V_MOV_B32_e32 (i32 0x80000000)))), + (V_XOR_B32_e64 (i32 (S_MOV_B32 (i32 0x80000000))), + (i32 (EXTRACT_SUBREG VReg_64:$src, sub1))), sub1) >; -// TODO: Use SGPR for constant def : GCNPat < (fneg (fabs (f64 VReg_64:$src))), (REG_SEQUENCE VReg_64, (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)), sub0, - (V_OR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)), - (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit. + (V_OR_B32_e64 (i32 (S_MOV_B32 (i32 0x80000000))), + (i32 (EXTRACT_SUBREG VReg_64:$src, sub1))), sub1) >; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s --- name: fabs_s32_ss @@ -19,6 +19,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GCN: $sgpr0 = COPY [[S_AND_B32_]] + ; SI-LABEL: name: fabs_s32_ss + ; SI: liveins: $sgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; VI-LABEL: name: fabs_s32_ss + ; VI: liveins: $sgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX9-LABEL: name: fabs_s32_ss + ; GFX9: liveins: $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX10-LABEL: name: fabs_s32_ss + ; GFX10: liveins: $sgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = COPY [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FABS %0 $sgpr0 = COPY %1 @@ -39,6 +67,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 ; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GCN: $vgpr0 = COPY [[V_AND_B32_e32_]] + ; SI-LABEL: name: fabs_s32_vv + ; SI: liveins: $vgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; VI-LABEL: name: fabs_s32_vv + ; VI: liveins: $vgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX9-LABEL: name: fabs_s32_vv + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX10-LABEL: name: fabs_s32_vv + ; GFX10: liveins: $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 $vgpr0 = COPY %1 @@ -58,6 +114,30 @@ ; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GCN: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] ; GCN: $vgpr0 = COPY [[FABS]](s32) + ; SI-LABEL: name: fabs_s32_vs + ; SI: liveins: $sgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] + ; SI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; VI-LABEL: name: fabs_s32_vs + ; VI: liveins: $sgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] + ; VI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; GFX9-LABEL: name: fabs_s32_vs + ; GFX9: liveins: $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] + ; GFX9-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; GFX10-LABEL: name: fabs_s32_vs + ; GFX10: liveins: $sgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] + ; GFX10-NEXT: $vgpr0 = COPY [[FABS]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FABS %0 $vgpr0 = COPY %1 @@ -78,6 +158,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GCN: $sgpr0 = COPY [[S_AND_B32_]] + ; SI-LABEL: name: fabs_v2s16_ss + ; SI: liveins: $sgpr0_sgpr1 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; VI-LABEL: name: fabs_v2s16_ss + ; VI: liveins: $sgpr0_sgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX9-LABEL: name: fabs_v2s16_ss + ; GFX9: liveins: $sgpr0_sgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX10-LABEL: name: fabs_v2s16_ss + ; GFX10: liveins: $sgpr0_sgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = COPY [[S_AND_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = G_FABS %0 $sgpr0 = COPY %1 @@ -98,6 +206,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GCN: $sgpr0 = COPY [[S_AND_B32_]] + ; SI-LABEL: name: fabs_s16_ss + ; SI: liveins: $sgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; VI-LABEL: name: fabs_s16_ss + ; VI: liveins: $sgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX9-LABEL: name: fabs_s16_ss + ; GFX9: liveins: $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX10-LABEL: name: fabs_s16_ss + ; GFX10: liveins: $sgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = COPY [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FABS %1 @@ -120,6 +256,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 ; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GCN: $vgpr0 = COPY [[V_AND_B32_e32_]] + ; SI-LABEL: name: fabs_s16_vv + ; SI: liveins: $vgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 + ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; VI-LABEL: name: fabs_s16_vv + ; VI: liveins: $vgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 + ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX9-LABEL: name: fabs_s16_vv + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX10-LABEL: name: fabs_s16_vv + ; GFX10: liveins: $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FABS %1 @@ -144,6 +308,38 @@ ; GCN: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] ; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) ; GCN: $vgpr0 = COPY [[COPY1]](s32) + ; SI-LABEL: name: fabs_s16_vs + ; SI: liveins: $sgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) + ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; VI-LABEL: name: fabs_s16_vs + ; VI: liveins: $sgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) + ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX9-LABEL: name: fabs_s16_vs + ; GFX9: liveins: $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX10-LABEL: name: fabs_s16_vs + ; GFX10: liveins: $sgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FABS %1 @@ -166,6 +362,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 ; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GCN: $vgpr0 = COPY [[V_AND_B32_e32_]] + ; SI-LABEL: name: fabs_v2s16_vv + ; SI: liveins: $vgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 + ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; VI-LABEL: name: fabs_v2s16_vv + ; VI: liveins: $vgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 + ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX9-LABEL: name: fabs_v2s16_vv + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX10-LABEL: name: fabs_v2s16_vv + ; GFX10: liveins: $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FABS %0 $vgpr0 = COPY %1 @@ -185,6 +409,30 @@ ; GCN: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; GCN: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] ; GCN: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; SI-LABEL: name: fabs_v2s16_vs + ; SI: liveins: $sgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] + ; SI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; VI-LABEL: name: fabs_v2s16_vs + ; VI: liveins: $sgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] + ; VI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; GFX9-LABEL: name: fabs_v2s16_vs + ; GFX9: liveins: $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] + ; GFX9-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; GFX10-LABEL: name: fabs_v2s16_vs + ; GFX10: liveins: $sgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] + ; GFX10-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = G_FABS %0 $vgpr0 = COPY %1 @@ -208,6 +456,46 @@ ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; SI-LABEL: name: fabs_s64_ss + ; SI: liveins: $sgpr0_sgpr1 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; VI-LABEL: name: fabs_s64_ss + ; VI: liveins: $sgpr0_sgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX9-LABEL: name: fabs_s64_ss + ; GFX9: liveins: $sgpr0_sgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX10-LABEL: name: fabs_s64_ss + ; GFX10: liveins: $sgpr0_sgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_FABS %0 S_ENDPGM 0, implicit %1 @@ -225,12 +513,52 @@ ; GCN-LABEL: name: fabs_s64_vv ; GCN: liveins: $vgpr0_vgpr1 ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 [[S_MOV_B32_]], [[COPY1]], implicit $exec ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; SI-LABEL: name: fabs_s64_vv + ; SI: liveins: $vgpr0_vgpr1 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; VI-LABEL: name: fabs_s64_vv + ; VI: liveins: $vgpr0_vgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX9-LABEL: name: fabs_s64_vv + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX10-LABEL: name: fabs_s64_vv + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FABS %0 S_ENDPGM 0, implicit %1 @@ -250,6 +578,30 @@ ; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GCN: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] ; GCN: S_ENDPGM 0, implicit [[FABS]](s64) + ; SI-LABEL: name: fabs_s64_vs + ; SI: liveins: $sgpr0_sgpr1 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] + ; SI-NEXT: S_ENDPGM 0, implicit [[FABS]](s64) + ; VI-LABEL: name: fabs_s64_vs + ; VI: liveins: $sgpr0_sgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] + ; VI-NEXT: S_ENDPGM 0, implicit [[FABS]](s64) + ; GFX9-LABEL: name: fabs_s64_vs + ; GFX9: liveins: $sgpr0_sgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] + ; GFX9-NEXT: S_ENDPGM 0, implicit [[FABS]](s64) + ; GFX10-LABEL: name: fabs_s64_vs + ; GFX10: liveins: $sgpr0_sgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] + ; GFX10-NEXT: S_ENDPGM 0, implicit [[FABS]](s64) %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_FABS %0 S_ENDPGM 0, implicit %1 @@ -268,12 +620,52 @@ ; GCN-LABEL: name: fabs_s64_vv_no_src_constraint ; GCN: liveins: $vgpr0_vgpr1 ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 - ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; SI-LABEL: name: fabs_s64_vv_no_src_constraint + ; SI: liveins: $vgpr0_vgpr1 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; VI-LABEL: name: fabs_s64_vv_no_src_constraint + ; VI: liveins: $vgpr0_vgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX9-LABEL: name: fabs_s64_vv_no_src_constraint + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX10-LABEL: name: fabs_s64_vv_no_src_constraint + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s64) = IMPLICIT_DEF %1:vgpr(s64) = G_FABS %0:vgpr(s64) S_ENDPGM 0, implicit %1 @@ -297,6 +689,46 @@ ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; SI-LABEL: name: fabs_s64_ss_no_src_constraint + ; SI: liveins: $sgpr0_sgpr1 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 + ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; VI-LABEL: name: fabs_s64_ss_no_src_constraint + ; VI: liveins: $sgpr0_sgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX9-LABEL: name: fabs_s64_ss_no_src_constraint + ; GFX9: liveins: $sgpr0_sgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX10-LABEL: name: fabs_s64_ss_no_src_constraint + ; GFX10: liveins: $sgpr0_sgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = IMPLICIT_DEF %1:sgpr(s64) = G_FABS %0:sgpr(s64) S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s --- @@ -18,12 +18,16 @@ liveins: $vgpr0 ; GFX8-LABEL: name: fcanonicalize_f16_denorm ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit %2 ; GFX9-LABEL: name: fcanonicalize_f16_denorm ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-LABEL: name: fcanonicalize_f16_denorm + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit %2 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FCANONICALIZE %1 @@ -45,12 +49,16 @@ liveins: $vgpr0 ; GFX8-LABEL: name: fcanonicalize_f16_flush ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: %2:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 15360, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 15360, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit %2 ; GFX9-LABEL: name: fcanonicalize_f16_flush ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-LABEL: name: fcanonicalize_f16_flush + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit %2 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FCANONICALIZE %1 @@ -73,12 +81,16 @@ ; GFX8-LABEL: name: fcanonicalize_f32_denorm ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: %1:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit %1 ; GFX9-LABEL: name: fcanonicalize_f32_denorm ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-LABEL: name: fcanonicalize_f32_denorm + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit %1 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -100,12 +112,16 @@ ; GFX8-LABEL: name: fcanonicalize_f32_flush ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: %1:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit %1 ; GFX9-LABEL: name: fcanonicalize_f32_flush ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-LABEL: name: fcanonicalize_f32_flush + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit %1 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -127,12 +143,16 @@ ; GFX8-LABEL: name: fcanonicalize_v2f16_denorm ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX8: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit %1 ; GFX9-LABEL: name: fcanonicalize_v2f16_denorm ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-LABEL: name: fcanonicalize_v2f16_denorm + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit %1 %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -154,12 +174,16 @@ ; GFX8-LABEL: name: fcanonicalize_v2f16_flush ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: %1:vgpr_32 = nofpexcept V_PK_MUL_F16 0, 15360, 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX8: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_PK_MUL_F16 0, 15360, 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit %1 ; GFX9-LABEL: name: fcanonicalize_v2f16_flush ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-LABEL: name: fcanonicalize_v2f16_flush + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit %1 %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -181,12 +205,16 @@ ; GFX8-LABEL: name: fcanonicalize_f64_denorm ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit %1 ; GFX9-LABEL: name: fcanonicalize_f64_denorm ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-LABEL: name: fcanonicalize_f64_denorm + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit %1 %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -208,12 +236,16 @@ ; GFX8-LABEL: name: fcanonicalize_f64_flush ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: %1:vreg_64 = nofpexcept V_MUL_F64_e64 0, 4607182418800017408, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: %1:vreg_64 = nofpexcept V_MUL_F64_e64 0, 4607182418800017408, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit %1 ; GFX9-LABEL: name: fcanonicalize_f64_flush ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-LABEL: name: fcanonicalize_f64_flush + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit %1 %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -234,12 +266,16 @@ liveins: $vgpr0 ; GFX8-LABEL: name: fcanonicalize_fabs_f32_denorm ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit %2 ; GFX9-LABEL: name: fcanonicalize_fabs_f32_denorm ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-LABEL: name: fcanonicalize_fabs_f32_denorm + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit %2 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 %2:vgpr(s32) = G_FCANONICALIZE %1 @@ -262,12 +298,16 @@ liveins: $vgpr0 ; GFX8-LABEL: name: fcanonicalize_fabs_f32_flush ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit %2 ; GFX9-LABEL: name: fcanonicalize_fabs_f32_flush ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-LABEL: name: fcanonicalize_fabs_f32_flush + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit %2 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 %2:vgpr(s32) = G_FCANONICALIZE %1 @@ -289,12 +329,16 @@ liveins: $vgpr0 ; GFX8-LABEL: name: fcanonicalize_fneg_f32_denorm ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit %2 ; GFX9-LABEL: name: fcanonicalize_fneg_f32_denorm ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-LABEL: name: fcanonicalize_fneg_f32_denorm + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit %2 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FCANONICALIZE %1 @@ -316,12 +360,16 @@ liveins: $vgpr0 ; GFX8-LABEL: name: fcanonicalize_fneg_f32_flush ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit %2 ; GFX9-LABEL: name: fcanonicalize_fneg_f32_flush ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-LABEL: name: fcanonicalize_fneg_f32_flush + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit %2 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FCANONICALIZE %1 @@ -343,16 +391,22 @@ liveins: $vgpr0 ; GFX8-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX8: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX8: %3:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e32_]], 0, 0, implicit $mode, implicit $exec - ; GFX8: S_ENDPGM 0, implicit %3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX8-NEXT: %3:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit %3 ; GFX9-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e32_]], 2, [[V_XOR_B32_e32_]], 0, 0, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0, implicit %3 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit %3 + ; GFX10-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit %3 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FABS %1 @@ -375,16 +429,22 @@ liveins: $vgpr0 ; GFX8-LABEL: name: fcanonicalize_fneg_fabs_f32_flush ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX8: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX8: %3:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e32_]], 0, 0, implicit $mode, implicit $exec - ; GFX8: S_ENDPGM 0, implicit %3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX8-NEXT: %3:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit %3 ; GFX9-LABEL: name: fcanonicalize_fneg_fabs_f32_flush ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e32_]], 2, [[V_XOR_B32_e32_]], 0, 0, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0, implicit %3 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit %3 + ; GFX10-LABEL: name: fcanonicalize_fneg_fabs_f32_flush + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit %3 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FABS %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s --- name: fneg_s32_ss @@ -19,6 +19,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GCN: $sgpr0 = COPY [[S_XOR_B32_]] + ; SI-LABEL: name: fneg_s32_ss + ; SI: liveins: $sgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; VI-LABEL: name: fneg_s32_ss + ; VI: liveins: $sgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX9-LABEL: name: fneg_s32_ss + ; GFX9: liveins: $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX10-LABEL: name: fneg_s32_ss + ; GFX10: liveins: $sgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FNEG %0 $sgpr0 = COPY %1 @@ -39,6 +67,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]] + ; SI-LABEL: name: fneg_s32_vv + ; SI: liveins: $vgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; VI-LABEL: name: fneg_s32_vv + ; VI: liveins: $vgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX9-LABEL: name: fneg_s32_vv + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX10-LABEL: name: fneg_s32_vv + ; GFX10: liveins: $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 $vgpr0 = COPY %1 @@ -58,6 +114,30 @@ ; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GCN: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] ; GCN: $vgpr0 = COPY [[FNEG]](s32) + ; SI-LABEL: name: fneg_s32_vs + ; SI: liveins: $sgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] + ; SI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; VI-LABEL: name: fneg_s32_vs + ; VI: liveins: $sgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] + ; VI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; GFX9-LABEL: name: fneg_s32_vs + ; GFX9: liveins: $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] + ; GFX9-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; GFX10-LABEL: name: fneg_s32_vs + ; GFX10: liveins: $sgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] + ; GFX10-NEXT: $vgpr0 = COPY [[FNEG]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FNEG %0 $vgpr0 = COPY %1 @@ -78,6 +158,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GCN: $sgpr0 = COPY [[S_XOR_B32_]] + ; SI-LABEL: name: fneg_s16_ss + ; SI: liveins: $sgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; VI-LABEL: name: fneg_s16_ss + ; VI: liveins: $sgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX9-LABEL: name: fneg_s16_ss + ; GFX9: liveins: $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX10-LABEL: name: fneg_s16_ss + ; GFX10: liveins: $sgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FNEG %1 @@ -100,6 +208,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]] + ; SI-LABEL: name: fneg_s16_vv + ; SI: liveins: $vgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; VI-LABEL: name: fneg_s16_vv + ; VI: liveins: $vgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX9-LABEL: name: fneg_s16_vv + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX10-LABEL: name: fneg_s16_vv + ; GFX10: liveins: $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 @@ -124,6 +260,38 @@ ; GCN: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] ; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) ; GCN: $vgpr0 = COPY [[COPY1]](s32) + ; SI-LABEL: name: fneg_s16_vs + ; SI: liveins: $sgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) + ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; VI-LABEL: name: fneg_s16_vs + ; VI: liveins: $sgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) + ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX9-LABEL: name: fneg_s16_vs + ; GFX9: liveins: $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX10-LABEL: name: fneg_s16_vs + ; GFX10: liveins: $sgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 @@ -146,6 +314,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GCN: $sgpr0 = COPY [[S_XOR_B32_]] + ; SI-LABEL: name: fneg_v2s16_ss + ; SI: liveins: $sgpr0_sgpr1 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; VI-LABEL: name: fneg_v2s16_ss + ; VI: liveins: $sgpr0_sgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX9-LABEL: name: fneg_v2s16_ss + ; GFX9: liveins: $sgpr0_sgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX10-LABEL: name: fneg_v2s16_ss + ; GFX10: liveins: $sgpr0_sgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = G_FNEG %0 $sgpr0 = COPY %1 @@ -166,6 +362,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]] + ; SI-LABEL: name: fneg_v2s16_vv + ; SI: liveins: $vgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; VI-LABEL: name: fneg_v2s16_vv + ; VI: liveins: $vgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX9-LABEL: name: fneg_v2s16_vv + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX10-LABEL: name: fneg_v2s16_vv + ; GFX10: liveins: $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FNEG %0 $vgpr0 = COPY %1 @@ -185,6 +409,30 @@ ; GCN: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; GCN: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] ; GCN: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; SI-LABEL: name: fneg_v2s16_vs + ; SI: liveins: $sgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] + ; SI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; VI-LABEL: name: fneg_v2s16_vs + ; VI: liveins: $sgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] + ; VI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; GFX9-LABEL: name: fneg_v2s16_vs + ; GFX9: liveins: $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] + ; GFX9-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; GFX10-LABEL: name: fneg_v2s16_vs + ; GFX10: liveins: $sgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] + ; GFX10-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = G_FNEG %0 $vgpr0 = COPY %1 @@ -208,6 +456,46 @@ ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; SI-LABEL: name: fneg_s64_ss + ; SI: liveins: $sgpr0_sgpr1 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 + ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; VI-LABEL: name: fneg_s64_ss + ; VI: liveins: $sgpr0_sgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 + ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX9-LABEL: name: fneg_s64_ss + ; GFX9: liveins: $sgpr0_sgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 + ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX10-LABEL: name: fneg_s64_ss + ; GFX10: liveins: $sgpr0_sgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 + ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_FNEG %0 S_ENDPGM 0, implicit %1 @@ -225,12 +513,52 @@ ; GCN-LABEL: name: fneg_s64_vv ; GCN: liveins: $vgpr0_vgpr1 ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483648, implicit $exec ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY1]], implicit $exec ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e32_]], %subreg.sub1 ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; SI-LABEL: name: fneg_s64_vv + ; SI: liveins: $vgpr0_vgpr1 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 + ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; VI-LABEL: name: fneg_s64_vv + ; VI: liveins: $vgpr0_vgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 + ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX9-LABEL: name: fneg_s64_vv + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 + ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX10-LABEL: name: fneg_s64_vv + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 + ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FNEG %0 S_ENDPGM 0, implicit %1 @@ -250,6 +578,30 @@ ; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GCN: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]] ; GCN: S_ENDPGM 0, implicit [[FNEG]](s64) + ; SI-LABEL: name: fneg_s64_vs + ; SI: liveins: $sgpr0_sgpr1 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]] + ; SI-NEXT: S_ENDPGM 0, implicit [[FNEG]](s64) + ; VI-LABEL: name: fneg_s64_vs + ; VI: liveins: $sgpr0_sgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]] + ; VI-NEXT: S_ENDPGM 0, implicit [[FNEG]](s64) + ; GFX9-LABEL: name: fneg_s64_vs + ; GFX9: liveins: $sgpr0_sgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]] + ; GFX9-NEXT: S_ENDPGM 0, implicit [[FNEG]](s64) + ; GFX10-LABEL: name: fneg_s64_vs + ; GFX10: liveins: $sgpr0_sgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]] + ; GFX10-NEXT: S_ENDPGM 0, implicit [[FNEG]](s64) %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_FNEG %0 S_ENDPGM 0, implicit %1 @@ -271,6 +623,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GCN: S_ENDPGM 0, implicit [[S_OR_B32_]] + ; SI-LABEL: name: fneg_fabs_s32_ss + ; SI: liveins: $sgpr0_sgpr1 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] + ; VI-LABEL: name: fneg_fabs_s32_ss + ; VI: liveins: $sgpr0_sgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] + ; GFX9-LABEL: name: fneg_fabs_s32_ss + ; GFX9: liveins: $sgpr0_sgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] + ; GFX10-LABEL: name: fneg_fabs_s32_ss + ; GFX10: liveins: $sgpr0_sgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FABS %0 %2:sgpr(s32) = G_FNEG %1 @@ -292,6 +672,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_XOR_B32_e32_]] + ; SI-LABEL: name: fneg_fabs_s32_vv + ; SI: liveins: $vgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] + ; VI-LABEL: name: fneg_fabs_s32_vv + ; VI: liveins: $vgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] + ; GFX9-LABEL: name: fneg_fabs_s32_vv + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] + ; GFX10-LABEL: name: fneg_fabs_s32_vv + ; GFX10: liveins: $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 %2:vgpr(s32) = G_FNEG %0 @@ -314,6 +722,38 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648 ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e32 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_XOR_B32_e32_]](s32) + ; SI-LABEL: name: fneg_fabs_s32_vs + ; SI: liveins: $sgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648 + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec + ; SI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]](s32) + ; VI-LABEL: name: fneg_fabs_s32_vs + ; VI: liveins: $sgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]](s32) + ; GFX9-LABEL: name: fneg_fabs_s32_vs + ; GFX9: liveins: $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]](s32) + ; GFX10-LABEL: name: fneg_fabs_s32_vs + ; GFX10: liveins: $sgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FABS %0 %2:vgpr(s32) = G_FNEG %1 @@ -335,6 +775,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GCN: $sgpr0 = COPY [[S_OR_B32_]] + ; SI-LABEL: name: fneg_fabs_s16_ss + ; SI: liveins: $sgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; VI-LABEL: name: fneg_fabs_s16_ss + ; VI: liveins: $sgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; GFX9-LABEL: name: fneg_fabs_s16_ss + ; GFX9: liveins: $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; GFX10-LABEL: name: fneg_fabs_s16_ss + ; GFX10: liveins: $sgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = COPY [[S_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FABS %1 @@ -359,6 +827,38 @@ ; GCN: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e32_]] ; GCN: $vgpr0 = COPY [[COPY1]] + ; SI-LABEL: name: fneg_fabs_s16_vv + ; SI: liveins: $vgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; SI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] + ; SI-NEXT: $vgpr0 = COPY [[COPY1]] + ; VI-LABEL: name: fneg_fabs_s16_vv + ; VI: liveins: $vgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; VI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] + ; VI-NEXT: $vgpr0 = COPY [[COPY1]] + ; GFX9-LABEL: name: fneg_fabs_s16_vv + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] + ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]] + ; GFX10-LABEL: name: fneg_fabs_s16_vv + ; GFX10: liveins: $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GFX10-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] + ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FABS %1 @@ -385,6 +885,42 @@ ; GCN: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] ; GCN: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) ; GCN: $vgpr0 = COPY [[COPY1]](s32) + ; SI-LABEL: name: fneg_fabs_s16_vs + ; SI: liveins: $sgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] + ; SI-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) + ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; VI-LABEL: name: fneg_fabs_s16_vs + ; VI: liveins: $sgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] + ; VI-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) + ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX9-LABEL: name: fneg_fabs_s16_vs + ; GFX9: liveins: $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX10-LABEL: name: fneg_fabs_s16_vs + ; GFX10: liveins: $sgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] + ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FNEG %1 @@ -408,6 +944,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GCN: $sgpr0 = COPY [[S_OR_B32_]] + ; SI-LABEL: name: fneg_fabs_v2s16_ss + ; SI: liveins: $sgpr0_sgpr1 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; VI-LABEL: name: fneg_fabs_v2s16_ss + ; VI: liveins: $sgpr0_sgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; GFX9-LABEL: name: fneg_fabs_v2s16_ss + ; GFX9: liveins: $sgpr0_sgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; GFX10-LABEL: name: fneg_fabs_v2s16_ss + ; GFX10: liveins: $sgpr0_sgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = COPY [[S_OR_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = G_FABS %0 %2:sgpr(<2 x s16>) = G_FNEG %1 @@ -429,6 +993,34 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]] + ; SI-LABEL: name: fneg_fabs_v2s16_vv + ; SI: liveins: $vgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; VI-LABEL: name: fneg_fabs_v2s16_vv + ; VI: liveins: $vgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX9-LABEL: name: fneg_fabs_v2s16_vv + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX10-LABEL: name: fneg_fabs_v2s16_vv + ; GFX10: liveins: $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FABS %0 %2:vgpr(<2 x s16>) = G_FNEG %0 @@ -451,6 +1043,38 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e32 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec ; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]](<2 x s16>) + ; SI-LABEL: name: fneg_fabs_v2s16_vs + ; SI: liveins: $sgpr0 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec + ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; VI-LABEL: name: fneg_fabs_v2s16_vs + ; VI: liveins: $sgpr0 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; GFX9-LABEL: name: fneg_fabs_v2s16_vs + ; GFX9: liveins: $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec + ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; GFX10-LABEL: name: fneg_fabs_v2s16_vs + ; GFX10: liveins: $sgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec + ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = G_FABS %0 %2:vgpr(<2 x s16>) = G_FNEG %1 @@ -475,6 +1099,46 @@ ; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; SI-LABEL: name: fneg_fabs_s64_ss + ; SI: liveins: $sgpr0_sgpr1 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 + ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; VI-LABEL: name: fneg_fabs_s64_ss + ; VI: liveins: $sgpr0_sgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 + ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX9-LABEL: name: fneg_fabs_s64_ss + ; GFX9: liveins: $sgpr0_sgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 + ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX10-LABEL: name: fneg_fabs_s64_ss + ; GFX10: liveins: $sgpr0_sgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 + ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_FABS %0 %2:sgpr(s64) = G_FNEG %1 @@ -493,12 +1157,52 @@ ; GCN-LABEL: name: fneg_fabs_s64_vv ; GCN: liveins: $vgpr0_vgpr1 ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483648, implicit $exec ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GCN: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GCN: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_MOV_B32_]], [[COPY1]], implicit $exec ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e32_]], %subreg.sub1 ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; SI-LABEL: name: fneg_fabs_s64_vv + ; SI: liveins: $vgpr0_vgpr1 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; SI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 + ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; VI-LABEL: name: fneg_fabs_s64_vv + ; VI: liveins: $vgpr0_vgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; VI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 + ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX9-LABEL: name: fneg_fabs_s64_vv + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 + ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GFX10-LABEL: name: fneg_fabs_s64_vv + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 + ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FABS %0 %2:vgpr(s64) = G_FNEG %1 @@ -518,12 +1222,56 @@ ; GCN: liveins: $sgpr0_sgpr1 ; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GCN: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]] - ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 2147483648, implicit $exec ; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64) - ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e32 [[COPY1]](s32), [[V_MOV_B32_e32_]](s32), implicit $exec + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 2147483648 + ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e32 [[S_MOV_B32_]](s32), [[COPY1]](s32), implicit $exec ; GCN: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64) ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e32_]](s16), %subreg.sub1 ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64) + ; SI-LABEL: name: fneg_fabs_s64_vs + ; SI: liveins: $sgpr0_sgpr1 + ; SI-NEXT: {{ $}} + ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; SI-NEXT: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64) + ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 2147483648 + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e64 [[S_MOV_B32_]](s32), [[COPY1]](s32), implicit $exec + ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64) + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e64_]](s16), %subreg.sub1 + ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64) + ; VI-LABEL: name: fneg_fabs_s64_vs + ; VI: liveins: $sgpr0_sgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; VI-NEXT: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64) + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 2147483648 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e64 [[S_MOV_B32_]](s32), [[COPY1]](s32), implicit $exec + ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64) + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e64_]](s16), %subreg.sub1 + ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64) + ; GFX9-LABEL: name: fneg_fabs_s64_vs + ; GFX9: liveins: $sgpr0_sgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64) + ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 2147483648 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e64 [[S_MOV_B32_]](s32), [[COPY1]](s32), implicit $exec + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e64_]](s16), %subreg.sub1 + ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64) + ; GFX10-LABEL: name: fneg_fabs_s64_vs + ; GFX10: liveins: $sgpr0_sgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]] + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64) + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 2147483648 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e64 [[S_MOV_B32_]](s32), [[COPY1]](s32), implicit $exec + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e64_]](s16), %subreg.sub1 + ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64) %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_FABS %0 %2:vgpr(s64) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir @@ -14,14 +14,16 @@ ; GCN-LABEL: name: fptosi_s32_to_s32_vv ; GCN: liveins: $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN: $vgpr0 = COPY %1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY %1 ; VI-LABEL: name: fptosi_s32_to_s32_vv ; VI: liveins: $vgpr0 - ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI: $vgpr0 = COPY %1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY %1 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FPTOSI %0 $vgpr0 = COPY %1 @@ -39,14 +41,16 @@ ; GCN-LABEL: name: fptosi_s32_to_s32_vs ; GCN: liveins: $sgpr0 - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN: $vgpr0 = COPY %1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY %1 ; VI-LABEL: name: fptosi_s32_to_s32_vs ; VI: liveins: $sgpr0 - ; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI: $vgpr0 = COPY %1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY %1 %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FPTOSI %0 $vgpr0 = COPY %1 @@ -64,14 +68,16 @@ ; GCN-LABEL: name: fptosi_s32_to_s32_fneg_vv ; GCN: liveins: $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN: $vgpr0 = COPY %2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY %2 ; VI-LABEL: name: fptosi_s32_to_s32_fneg_vv ; VI: liveins: $vgpr0 - ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI: $vgpr0 = COPY %2 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY %2 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -90,16 +96,18 @@ ; GCN-LABEL: name: fptosi_s16_to_s32_vv ; GCN: liveins: $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; GCN: $vgpr0 = COPY %2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY %2 ; VI-LABEL: name: fptosi_s16_to_s32_vv ; VI: liveins: $vgpr0 - ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; VI: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; VI: $vgpr0 = COPY %2 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY %2 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -118,16 +126,18 @@ ; GCN-LABEL: name: fptosi_s16_to_s32_vs ; GCN: liveins: $sgpr0 - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; GCN: $vgpr0 = COPY %2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY %2 ; VI-LABEL: name: fptosi_s16_to_s32_vs ; VI: liveins: $sgpr0 - ; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; VI: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; VI: $vgpr0 = COPY %2 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY %2 %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -146,20 +156,22 @@ ; GCN-LABEL: name: fptosi_s16_to_s32_fneg_vv ; GCN: liveins: $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec - ; GCN: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN: $vgpr0 = COPY %3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY %3 ; VI-LABEL: name: fptosi_s16_to_s32_fneg_vv ; VI: liveins: $vgpr0 - ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec - ; VI: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; VI: $vgpr0 = COPY %3 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY %3 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 @@ -179,16 +191,18 @@ ; GCN-LABEL: name: fptosi_s16_to_s1_vv ; GCN: liveins: $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit %2 ; VI-LABEL: name: fptosi_s16_to_s1_vv ; VI: liveins: $vgpr0 - ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; VI: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; VI: S_ENDPGM 0, implicit %2 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit %2 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -208,16 +222,18 @@ ; GCN-LABEL: name: fptosi_s16_to_s1_vs ; GCN: liveins: $sgpr0 - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit %2 ; VI-LABEL: name: fptosi_s16_to_s1_vs ; VI: liveins: $sgpr0 - ; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; VI: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; VI: S_ENDPGM 0, implicit %2 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit %2 %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -237,20 +253,22 @@ ; GCN-LABEL: name: fptosi_s16_to_s1_fneg_vv ; GCN: liveins: $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec - ; GCN: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec - ; GCN: S_ENDPGM 0, implicit %3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit %3 ; VI-LABEL: name: fptosi_s16_to_s1_fneg_vv ; VI: liveins: $vgpr0 - ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec - ; VI: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec - ; VI: S_ENDPGM 0, implicit %3 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit %3 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir @@ -14,20 +14,20 @@ ; GCN-LABEL: name: fptoui ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GCN: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; VI-LABEL: name: fptoui ; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; VI: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; VI: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; VI: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -56,16 +56,18 @@ ; GCN-LABEL: name: fptoui_s16_to_s32_vv ; GCN: liveins: $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; GCN: $vgpr0 = COPY %2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY %2 ; VI-LABEL: name: fptoui_s16_to_s32_vv ; VI: liveins: $vgpr0 - ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; VI: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; VI: $vgpr0 = COPY %2 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY %2 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOUI %1 @@ -84,16 +86,18 @@ ; GCN-LABEL: name: fptoui_s16_to_s32_vs ; GCN: liveins: $sgpr0 - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; GCN: $vgpr0 = COPY %2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY %2 ; VI-LABEL: name: fptoui_s16_to_s32_vs ; VI: liveins: $sgpr0 - ; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; VI: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; VI: $vgpr0 = COPY %2 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY %2 %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOUI %1 @@ -112,20 +116,22 @@ ; GCN-LABEL: name: fptoui_s16_to_s32_fneg_vv ; GCN: liveins: $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec - ; GCN: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN: $vgpr0 = COPY %3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY %3 ; VI-LABEL: name: fptoui_s16_to_s32_fneg_vv ; VI: liveins: $vgpr0 - ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec - ; VI: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; VI: $vgpr0 = COPY %3 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY %3 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 @@ -145,16 +151,18 @@ ; GCN-LABEL: name: fptoui_s16_to_s1_vv ; GCN: liveins: $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit %2 ; VI-LABEL: name: fptoui_s16_to_s1_vv ; VI: liveins: $vgpr0 - ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; VI: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; VI: S_ENDPGM 0, implicit %2 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit %2 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOUI %1 @@ -174,16 +182,18 @@ ; GCN-LABEL: name: fptoui_s16_to_s1_vs ; GCN: liveins: $sgpr0 - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit %2 ; VI-LABEL: name: fptoui_s16_to_s1_vs ; VI: liveins: $sgpr0 - ; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec - ; VI: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; VI: S_ENDPGM 0, implicit %2 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit %2 %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOUI %1 @@ -203,20 +213,22 @@ ; GCN-LABEL: name: fptoui_s16_to_s1_fneg_vv ; GCN: liveins: $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec - ; GCN: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec - ; GCN: S_ENDPGM 0, implicit %3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit %3 ; VI-LABEL: name: fptoui_s16_to_s1_fneg_vv ; VI: liveins: $vgpr0 - ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec - ; VI: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec - ; VI: S_ENDPGM 0, implicit %3 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit %3 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir @@ -12,14 +12,14 @@ ; GCN-LABEL: name: select_s32_scc ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN: $scc = COPY [[COPY4]] - ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[COPY3]], implicit $scc - ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN-NEXT: $scc = COPY [[COPY4]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = COPY $sgpr2 @@ -41,14 +41,14 @@ ; GCN-LABEL: name: select_s64_scc ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN: $scc = COPY [[COPY4]] - ; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc - ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN-NEXT: $scc = COPY [[COPY4]] + ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s64) = COPY $sgpr2_sgpr3 @@ -70,14 +70,14 @@ ; GCN-LABEL: name: select_p0_scc ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN: $scc = COPY [[COPY4]] - ; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc - ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN-NEXT: $scc = COPY [[COPY4]] + ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(p0) = COPY $sgpr2_sgpr3 @@ -99,14 +99,14 @@ ; GCN-LABEL: name: select_p1_scc ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN: $scc = COPY [[COPY4]] - ; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc - ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN-NEXT: $scc = COPY [[COPY4]] + ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(p1) = COPY $sgpr2_sgpr3 @@ -128,14 +128,14 @@ ; GCN-LABEL: name: select_p999_scc ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN: $scc = COPY [[COPY4]] - ; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc - ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN-NEXT: $scc = COPY [[COPY4]] + ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(p999) = COPY $sgpr2_sgpr3 @@ -157,14 +157,14 @@ ; GCN-LABEL: name: select_v4s16_scc ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN: $scc = COPY [[COPY4]] - ; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc - ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN-NEXT: $scc = COPY [[COPY4]] + ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 @@ -186,14 +186,14 @@ ; GCN-LABEL: name: select_s16_scc ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN: S_CMP_EQ_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN: $scc = COPY [[COPY4]] - ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc - ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[COPY3]], implicit-def $scc + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN-NEXT: $scc = COPY [[COPY4]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = COPY $sgpr2 @@ -217,14 +217,14 @@ ; GCN-LABEL: name: select_v2s16_scc ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN: $scc = COPY [[COPY4]] - ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[COPY3]], implicit $scc - ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN-NEXT: $scc = COPY [[COPY4]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(<2 x s16>) = COPY $sgpr2 @@ -246,12 +246,12 @@ ; GCN-LABEL: name: select_s32_vcc ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -273,12 +273,12 @@ ; GCN-LABEL: name: select_s16_vcc ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec - ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -302,12 +302,12 @@ ; GCN-LABEL: name: select_v2s16_vcc ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(<2 x s16>) = COPY $vgpr2 @@ -329,12 +329,12 @@ ; GCN-LABEL: name: select_p3_vcc ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(p3) = COPY $vgpr2 @@ -357,12 +357,12 @@ ; GCN-LABEL: name: select_s32_vcc_fneg_lhs ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 1, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 1, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -385,12 +385,12 @@ ; GCN-LABEL: name: select_s32_vcc_fneg_rhs ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 1, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 1, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -413,12 +413,12 @@ ; GCN-LABEL: name: select_s32_vcc_fneg_fabs_lhs ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 3, [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 3, [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -443,14 +443,14 @@ ; GCN-LABEL: name: select_s16_vcc_fneg_lhs ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec - ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[V_XOR_B32_e32_]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[V_XOR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -477,13 +477,13 @@ ; GCN-LABEL: name: select_v2s16_vcc_fneg_lhs ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY2]], implicit $exec - ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[V_XOR_B32_e32_]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY2]], implicit $exec + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[V_XOR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(<2 x s16>) = COPY $vgpr2 @@ -508,16 +508,16 @@ ; GCN-LABEL: name: select_s32_scc_fneg_lhs ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN: $scc = COPY [[COPY4]] - ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[S_XOR_B32_]], [[COPY3]], implicit $scc - ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GCN-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN-NEXT: $scc = COPY [[COPY4]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[S_XOR_B32_]], [[COPY3]], implicit $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = COPY $sgpr2 @@ -540,16 +540,16 @@ ; GCN-LABEL: name: select_s32_scc_fneg_rhs ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY3]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN: $scc = COPY [[COPY4]] - ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[S_XOR_B32_]], implicit $scc - ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GCN-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY3]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN-NEXT: $scc = COPY [[COPY4]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[S_XOR_B32_]], implicit $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = COPY $sgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/fabs.f64.ll b/llvm/test/CodeGen/AMDGPU/fabs.f64.ll --- a/llvm/test/CodeGen/AMDGPU/fabs.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fabs.f64.ll @@ -21,8 +21,7 @@ } ; FUNC-LABEL: {{^}}fabs_f64: -; SI: v_and_b32 -; SI-NOT: v_and_b32 +; SI: s_bitset0_b32 ; SI: s_endpgm define amdgpu_kernel void @fabs_f64(double addrspace(1)* %out, double %in) { %fabs = call double @llvm.fabs.f64(double %in) @@ -31,8 +30,8 @@ } ; FUNC-LABEL: {{^}}fabs_v2f64: -; SI: v_and_b32 -; SI: v_and_b32 +; SI: s_and_b32 +; SI: s_and_b32 ; SI: s_endpgm define amdgpu_kernel void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) { %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in) @@ -41,10 +40,10 @@ } ; FUNC-LABEL: {{^}}fabs_v4f64: -; SI: v_and_b32 -; SI: v_and_b32 -; SI: v_and_b32 -; SI: v_and_b32 +; SI: s_and_b32 +; SI: s_and_b32 +; SI: s_and_b32 +; SI: s_and_b32 ; SI: s_endpgm define amdgpu_kernel void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) { %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in) @@ -77,7 +76,7 @@ } ; FUNC-LABEL: {{^}}fabs_free_f64: -; SI: v_and_b32 +; SI: s_bitset0_b32 ; SI: s_endpgm define amdgpu_kernel void @fabs_free_f64(double addrspace(1)* %out, i64 %in) { %bc= bitcast i64 %in to double @@ -87,7 +86,7 @@ } ; FUNC-LABEL: {{^}}fabs_fn_free_f64: -; SI: v_and_b32 +; SI: s_bitset0_b32 ; SI: s_endpgm define amdgpu_kernel void @fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) { %bc= bitcast i64 %in to double diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll @@ -0,0 +1,504 @@ +; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,FP16 %s + + +define amdgpu_kernel void @divergent_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) { +; GCN-LABEL: name: divergent_fneg_f32 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 +; GCN: V_XOR_B32_e64 killed %[[REG]] + + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %val = load volatile float, float addrspace(1)* %in.gep + %fneg = fneg float %val + store float %fneg, float addrspace(1)* %out.gep + ret void +} + +define amdgpu_kernel void @uniform_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in, i64 %idx) { +; GCN-LABEL: name: uniform_fneg_f32 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 +; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]] + + %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %idx + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %idx + %val = load volatile float, float addrspace(1)* %in.gep + %fneg = fneg float %val + store float %fneg, float addrspace(1)* %out.gep + ret void +} + +define amdgpu_kernel void @divergent_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) { +; GCN-LABEL: name: divergent_fabs_f32 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 +; GCN: V_AND_B32_e64 killed %[[REG]] + + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %val = load volatile float, float addrspace(1)* %in.gep + %fabs = call float @llvm.fabs.f32(float %val) + store float %fabs, float addrspace(1)* %out.gep + ret void +} + +define amdgpu_kernel void @uniform_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in, i64 %idx) { +; GCN-LABEL: name: uniform_fabs_f32 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 +; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]] + + %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %idx + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %idx + %val = load volatile float, float addrspace(1)* %in.gep + %fabs = call float @llvm.fabs.f32(float %val) + store float %fabs, float addrspace(1)* %out.gep + ret void +} + +define amdgpu_kernel void @divergent_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) { +; GCN-LABEL: name: divergent_fneg_fabs_f32 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 +; GCN: V_OR_B32_e64 killed %[[REG]] + + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %val = load volatile float, float addrspace(1)* %in.gep + %fabs = call float @llvm.fabs.f32(float %val) + %fneg = fneg float %fabs + store float %fneg, float addrspace(1)* %out.gep + ret void +} + +define amdgpu_kernel void @uniform_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in, i64 %idx) { +; GCN-LABEL: name: uniform_fneg_fabs_f32 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 +; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]] + + %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %idx + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %idx + %val = load volatile float, float addrspace(1)* %in.gep + %fabs = call float @llvm.fabs.f32(float %val) + %fneg = fneg float %fabs + store float %fneg, float addrspace(1)* %out.gep + ret void +} + + +define amdgpu_kernel void @divergent_fabs_f16(half addrspace(1)* %in, half addrspace(1)* %out) { +; GCN-LABEL: name: divergent_fabs_f16 +; GCN-LABEL: bb.0 (%ir-block.0) +; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32767 +; FP16: V_AND_B32_e64 killed %[[REG]] + + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %tid.ext + %val = load volatile half, half addrspace(1)* %in.gep + %fabs = call half @llvm.fabs.f16(half %val) + store half %fabs, half addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @uniform_fabs_f16(half addrspace(1)* %in, half addrspace(1)* %out, i64 %idx) { +; GCN-LABEL: name: uniform_fabs_f16 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32767 +; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]] + + %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %idx + %val = load volatile half, half addrspace(1)* %in.gep + %fabs = call half @llvm.fabs.f16(half %val) + store half %fabs, half addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @divergent_fneg_f16(half addrspace(1)* %in, half addrspace(1)* %out) { +; GCN-LABEL: name: divergent_fneg_f16 +; GCN-LABEL: bb.0 (%ir-block.0) +; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768 +; FP16: V_XOR_B32_e64 killed %[[REG]] + + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %tid.ext + %val = load volatile half, half addrspace(1)* %in.gep + %fneg = fneg half %val + store half %fneg, half addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @uniform_fneg_f16(half addrspace(1)* %in, half addrspace(1)* %out, i64 %idx) { +; GCN-LABEL: name: uniform_fneg_f16 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768 +; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]] + + %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %idx + %val = load volatile half, half addrspace(1)* %in.gep + %fneg = fneg half %val + store half %fneg, half addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @divergent_fneg_fabs_f16(half addrspace(1)* %in, half addrspace(1)* %out) { +; GCN-LABEL: name: divergent_fneg_fabs_f16 +; GCN-LABEL: bb.0 (%ir-block.0) +; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768 +; FP16: V_OR_B32_e64 killed %[[REG]] + + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %tid.ext + %val = load volatile half, half addrspace(1)* %in.gep + %fabs = call half @llvm.fabs.f16(half %val) + %fneg = fneg half %fabs + store half %fneg, half addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @uniform_fneg_fabs_f16(half addrspace(1)* %in, half addrspace(1)* %out, i64 %idx) { +; GCN-LABEL: name: uniform_fneg_fabs_f16 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768 +; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]] + + %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %idx + %val = load volatile half, half addrspace(1)* %in.gep + %fabs = call half @llvm.fabs.f16(half %val) + %fneg = fneg half %fabs + store half %fneg, half addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @divergent_fneg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) { +; GCN-LABEL: name: divergent_fneg_v2f16 +; GCN-LABEL: bb.0 (%ir-block.0) +; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880 +; FP16: V_XOR_B32_e64 killed %[[REG]] + + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid + %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid + %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2 + %fneg = fneg <2 x half> %val + store <2 x half> %fneg, <2 x half> addrspace(1)* %gep.out + ret void +} + +define amdgpu_kernel void @uniform_fneg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in, i32 %idx) { +; GCN-LABEL: name: uniform_fneg_v2f16 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880 +; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]] + + %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx + %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx + %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2 + %fneg = fneg <2 x half> %val + store <2 x half> %fneg, <2 x half> addrspace(1)* %gep.out + ret void +} + +define amdgpu_kernel void @divergent_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) { +; GCN-LABEL: name: divergent_fabs_v2f16 +; GCN-LABEL: bb.0 (%ir-block.0) +; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 +; FP16: V_AND_B32_e64 killed %[[REG]] + + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid + %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid + %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2 + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val) + store <2 x half> %fabs, <2 x half> addrspace(1)* %gep.out + ret void +} + +define amdgpu_kernel void @uniform_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in, i32 %idx) { +; GCN-LABEL: name: uniform_fabs_v2f16 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 +; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]] + + %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx + %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx + %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2 + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val) + store <2 x half> %fabs, <2 x half> addrspace(1)* %gep.out + ret void +} + +define amdgpu_kernel void @divergent_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) { +; GCN-LABEL: name: divergent_fneg_fabs_v2f16 +; GCN-LABEL: bb.0 (%ir-block.0) +; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880 +; FP16: V_OR_B32_e64 killed %[[REG]] + + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid + %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid + %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2 + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val) + %fneg = fneg <2 x half> %fabs + store <2 x half> %fneg, <2 x half> addrspace(1)* %gep.out + ret void +} + +define amdgpu_kernel void @uniform_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in, i32 %idx) { +; GCN-LABEL: name: uniform_fneg_fabs_v2f16 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880 +; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]] + + %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx + %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx + %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2 + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val) + %fneg = fneg <2 x half> %fabs + store <2 x half> %fneg, <2 x half> addrspace(1)* %gep.out + ret void +} + +define amdgpu_kernel void @divergent_fneg_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { +; GCN-LABEL: name: divergent_fneg_v2f32 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 +; GCN: V_XOR_B32_e64 %[[REG]] +; GCN: V_XOR_B32_e64 %[[REG]] + + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid + %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid + %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4 + %fneg = fneg <2 x float> %val + store <2 x float> %fneg, <2 x float> addrspace(1)* %gep.out + ret void +} + +define amdgpu_kernel void @uniform_fneg_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in, i32 %idx) { +; GCN-LABEL: name: uniform_fneg_v2f32 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 +; GCN: S_XOR_B32 killed %{{[0-9]+}}, %[[REG]] +; GCN: S_XOR_B32 killed %{{[0-9]+}}, %[[REG]] + + %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx + %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx + %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4 + %fneg = fneg <2 x float> %val + store <2 x float> %fneg, <2 x float> addrspace(1)* %gep.out + ret void +} + +define amdgpu_kernel void @divergent_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { +; GCN-LABEL: name: divergent_fabs_v2f32 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 +; GCN: V_AND_B32_e64 %[[REG]] +; GCN: V_AND_B32_e64 %[[REG]] + + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid + %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid + %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4 + %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val) + store <2 x float> %fabs, <2 x float> addrspace(1)* %gep.out + ret void +} + +define amdgpu_kernel void @uniform_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in, i32 %idx) { +; GCN-LABEL: name: uniform_fabs_v2f32 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 +; GCN: S_AND_B32 killed %{{[0-9]+}}, %[[REG]] +; GCN: S_AND_B32 killed %{{[0-9]+}}, %[[REG]] + + %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx + %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx + %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4 + %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val) + store <2 x float> %fabs, <2 x float> addrspace(1)* %gep.out + ret void +} + +define amdgpu_kernel void @divergent_fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { +; GCN-LABEL: name: divergent_fneg_fabs_v2f32 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 +; GCN: V_OR_B32_e64 %[[REG]] +; GCN: V_OR_B32_e64 %[[REG]] + + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid + %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid + %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4 + %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val) + %fneg = fneg <2 x float> %fabs + store <2 x float> %fneg, <2 x float> addrspace(1)* %gep.out + ret void +} + +define amdgpu_kernel void @uniform_fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in, i32 %idx) { +; GCN-LABEL: name: uniform_fneg_fabs_v2f32 +; GCN-LABEL: bb.0 (%ir-block.0) +; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 +; GCN: S_OR_B32 killed %{{[0-9]+}}, %[[REG]] +; GCN: S_OR_B32 killed %{{[0-9]+}}, %[[REG]] + + %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx + %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx + %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4 + %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val) + %fneg = fneg <2 x float> %fabs + store <2 x float> %fneg, <2 x float> addrspace(1)* %gep.out + ret void +} + +define amdgpu_kernel void @divergent_fneg_f64(double addrspace(1)* %out, double addrspace(1)* %in) { +; GCN-LABEL: name: divergent_fneg_f64 +; GCN-LABEL: bb.0 (%ir-block.0) +; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 +; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR +; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1 +; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 +; GCN: %[[XOR:[0-9]+]]:vgpr_32 = V_XOR_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]] +; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0 +; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR]], %subreg.sub1 + + + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %tid.ext + %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext + %val = load volatile double, double addrspace(1)* %in.gep + %fneg = fneg double %val + store double %fneg, double addrspace(1)* %out.gep + ret void +} + +define amdgpu_kernel void @uniform_fneg_f64(double addrspace(1)* %out, double addrspace(1)* %in, i64 %idx) { +; GCN-LABEL: name: uniform_fneg_f64 +; GCN-LABEL: bb.0 (%ir-block.0) +; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 +; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR +; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0 +; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1 +; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 +; GCN: %[[XOR:[0-9]+]]:sreg_32 = S_XOR_B32 killed %[[HI32]], killed %[[SREG_MASK]] +; GCN: %[[XOR_COPY:[0-9]+]]:sreg_32 = COPY %[[XOR]] +; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR_COPY]], %subreg.sub1 + + %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %idx + %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %idx + %val = load volatile double, double addrspace(1)* %in.gep + %fneg = fneg double %val + store double %fneg, double addrspace(1)* %out.gep + ret void +} + +define amdgpu_kernel void @divergent_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) { +; GCN-LABEL: name: divergent_fabs_f64 +; GCN-LABEL: bb.0 (%ir-block.0) +; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 +; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR +; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1 +; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 +; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]] +; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0 +; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND]], %subreg.sub1 + + + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %tid.ext + %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext + %val = load volatile double, double addrspace(1)* %in.gep + %fabs = call double @llvm.fabs.f64(double %val) + store double %fabs, double addrspace(1)* %out.gep + ret void +} + +define amdgpu_kernel void @uniform_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in, i64 %idx) { +; GCN-LABEL: name: uniform_fabs_f64 +; GCN-LABEL: bb.0 (%ir-block.0) +; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 +; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR +; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0 +; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1 +; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 +; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %[[HI32]], killed %[[SREG_MASK]] +; GCN: %[[AND_COPY:[0-9]+]]:sreg_32 = COPY %[[AND]] +; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND_COPY]], %subreg.sub1 + + + %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %idx + %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %idx + %val = load volatile double, double addrspace(1)* %in.gep + %fabs = call double @llvm.fabs.f64(double %val) + store double %fabs, double addrspace(1)* %out.gep + ret void +} + +define amdgpu_kernel void @divergent_fneg_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) { +; GCN-LABEL: name: divergent_fneg_fabs_f64 +; GCN-LABEL: bb.0 (%ir-block.0) +; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 +; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR +; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1 +; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 +; GCN: %[[OR:[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]] +; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0 +; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR]], %subreg.sub1 + + + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %tid.ext + %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext + %val = load volatile double, double addrspace(1)* %in.gep + %fabs = call double @llvm.fabs.f64(double %val) + %fneg = fneg double %fabs + store double %fneg, double addrspace(1)* %out.gep + ret void +} + +define amdgpu_kernel void @uniform_fneg_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in, i64 %idx) { +; GCN-LABEL: name: uniform_fneg_fabs_f64 +; GCN-LABEL: bb.0 (%ir-block.0) +; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 +; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR +; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0 +; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1 +; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 +; GCN: %[[OR:[0-9]+]]:sreg_32 = S_OR_B32 killed %[[HI32]], killed %[[SREG_MASK]] +; GCN: %[[OR_COPY:[0-9]+]]:sreg_32 = COPY %[[OR]] +; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR_COPY]], %subreg.sub1 + + + %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %idx + %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %idx + %val = load volatile double, double addrspace(1)* %in.gep + %fabs = call double @llvm.fabs.f64(double %val) + %fneg = fneg double %fabs + store double %fneg, double addrspace(1)* %out.gep + ret void +} + +declare float @llvm.fabs.f32(float) +declare half @llvm.fabs.f16(half) +declare double @llvm.fabs.f64(double) +declare <2 x half> @llvm.fabs.v2f16(<2 x half>) +declare <2 x float> @llvm.fabs.v2f32(<2 x float>) + +declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll @@ -44,8 +44,8 @@ } ; GCN-LABEL: {{^}}fneg_fabs_fn_free_f64: -; GCN: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}} -; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] +; SI: s_bitset1_b32 +; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 define amdgpu_kernel void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) { %bc = bitcast i64 %in to double %fabs = call double @fabs(double %bc) @@ -55,11 +55,11 @@ } ; GCN-LABEL: {{^}}fneg_fabs_f64: -; GCN-DAG: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}} ; SI-DAG: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x13 ; VI-DAG: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x4c -; GCN-DAG: v_or_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]] +; GCN-DAG: s_bitset1_b32 s[[HI_X]], 31 ; GCN-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]] +; GCN-DAG: v_mov_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]] ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_V]]:[[HI_V]]{{\]}} define amdgpu_kernel void @fneg_fabs_f64(double addrspace(1)* %out, [8 x i32], double %in) { %fabs = call double @llvm.fabs.f64(double %in) @@ -69,10 +69,10 @@ } ; GCN-LABEL: {{^}}fneg_fabs_v2f64: -; GCN: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}} +; GCN: s_brev_b32 [[IMMREG:s[0-9]+]], 1{{$}} ; GCN-NOT: 0x80000000 -; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] -; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] +; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] +; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] define amdgpu_kernel void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) { %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in) %fsub = fsub <2 x double> , %fabs @@ -81,12 +81,12 @@ } ; GCN-LABEL: {{^}}fneg_fabs_v4f64: -; GCN: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}} +; GCN: s_brev_b32 [[IMMREG:s[0-9]+]], 1{{$}} ; GCN-NOT: 0x80000000 -; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] -; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] -; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] -; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] +; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] +; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] +; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] +; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] define amdgpu_kernel void @fneg_fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) { %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in) %fsub = fsub <4 x double> , %fabs diff --git a/llvm/test/CodeGen/AMDGPU/fneg.f64.ll b/llvm/test/CodeGen/AMDGPU/fneg.f64.ll --- a/llvm/test/CodeGen/AMDGPU/fneg.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg.f64.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}fneg_f64: -; GCN: v_xor_b32 +; GCN: s_xor_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 define amdgpu_kernel void @fneg_f64(double addrspace(1)* %out, double %in) { %fneg = fsub double -0.000000e+00, %in store double %fneg, double addrspace(1)* %out @@ -10,8 +10,8 @@ } ; FUNC-LABEL: {{^}}fneg_v2f64: -; GCN: v_xor_b32 -; GCN: v_xor_b32 +; GCN: s_xor_b32 +; GCN: s_xor_b32 define amdgpu_kernel void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) { %fneg = fsub <2 x double> , %in store <2 x double> %fneg, <2 x double> addrspace(1)* %out @@ -24,10 +24,10 @@ ; R600: -PV ; R600: -PV -; GCN: v_xor_b32 -; GCN: v_xor_b32 -; GCN: v_xor_b32 -; GCN: v_xor_b32 +; GCN: s_xor_b32 +; GCN: s_xor_b32 +; GCN: s_xor_b32 +; GCN: s_xor_b32 define amdgpu_kernel void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) { %fneg = fsub <4 x double> , %in store <4 x double> %fneg, <4 x double> addrspace(1)* %out