diff --git a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll @@ -0,0 +1,742 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck %s + +define i1 @test1(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test1 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_LT_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp slt i32 %arg1, 1000 + %cmp2 = icmp slt i32 %arg2, 1000 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test2(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test2 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LT_U32_e64_]], killed [[V_CMP_LT_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp ult i32 %arg1, 1000 + %cmp2 = icmp ult i32 %arg2, 1000 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test3(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test3 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1001 + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_LT_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp sle i32 %arg1, 1000 + %cmp2 = icmp sle i32 %arg2, 1000 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test4(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test4 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1001 + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LT_U32_e64_]], killed [[V_CMP_LT_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp ule i32 %arg1, 1000 + %cmp2 = icmp ule i32 %arg2, 1000 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test5(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test5 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GT_I32_e64_]], killed [[V_CMP_GT_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp sgt i32 %arg1, 1000 + %cmp2 = icmp sgt i32 %arg2, 1000 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test6(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test6 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GT_U32_e64_]], killed [[V_CMP_GT_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp ugt i32 %arg1, 1000 + %cmp2 = icmp ugt i32 %arg2, 1000 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test7(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test7 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999 + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GT_I32_e64_]], killed [[V_CMP_GT_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp sge i32 %arg1, 1000 + %cmp2 = icmp sge i32 %arg2, 1000 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test8(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test8 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999 + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GT_U32_e64_]], killed [[V_CMP_GT_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp uge i32 %arg1, 1000 + %cmp2 = icmp uge i32 %arg2, 1000 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test9(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test9 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_LT_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp slt i32 %arg1, %arg3 + %cmp2 = icmp slt i32 %arg2, %arg3 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test10(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test10 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LT_U32_e64_]], killed [[V_CMP_LT_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp ult i32 %arg1, %arg3 + %cmp2 = icmp ult i32 %arg2, %arg3 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test11(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test11 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_LE_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LE_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LE_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LE_I32_e64_]], killed [[V_CMP_LE_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp sle i32 %arg1, %arg3 + %cmp2 = icmp sle i32 %arg2, %arg3 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test12(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test12 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_LE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LE_U32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LE_U32_e64_]], killed [[V_CMP_LE_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp ule i32 %arg1, %arg3 + %cmp2 = icmp ule i32 %arg2, %arg3 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test13(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test13 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GT_I32_e64_]], killed [[V_CMP_GT_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp sgt i32 %arg1, %arg3 + %cmp2 = icmp sgt i32 %arg2, %arg3 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test14(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test14 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GT_U32_e64_]], killed [[V_CMP_GT_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp ugt i32 %arg1, %arg3 + %cmp2 = icmp ugt i32 %arg2, %arg3 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test15(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test15 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_GE_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_I32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GE_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GE_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GE_I32_e64_]], killed [[V_CMP_GE_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp sge i32 %arg1, %arg3 + %cmp2 = icmp sge i32 %arg2, %arg3 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test16(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test16 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_GE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GE_U32_e64_]], killed [[V_CMP_GE_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp uge i32 %arg1, %arg3 + %cmp2 = icmp uge i32 %arg2, %arg3 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test17(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test17 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_LT_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp slt i32 %arg1, 1000 + %cmp2 = icmp slt i32 %arg2, 1000 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test18(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test18 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LT_U32_e64_]], killed [[V_CMP_LT_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp ult i32 %arg1, 1000 + %cmp2 = icmp ult i32 %arg2, 1000 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test19(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test19 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1001 + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_LT_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp sle i32 %arg1, 1000 + %cmp2 = icmp sle i32 %arg2, 1000 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test20(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test20 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1001 + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LT_U32_e64_]], killed [[V_CMP_LT_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp ule i32 %arg1, 1000 + %cmp2 = icmp ule i32 %arg2, 1000 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test21(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test21 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GT_I32_e64_]], killed [[V_CMP_GT_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp sgt i32 %arg1, 1000 + %cmp2 = icmp sgt i32 %arg2, 1000 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test22(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test22 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GT_U32_e64_]], killed [[V_CMP_GT_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp ugt i32 %arg1, 1000 + %cmp2 = icmp ugt i32 %arg2, 1000 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test23(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test23 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999 + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GT_I32_e64_]], killed [[V_CMP_GT_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp sge i32 %arg1, 1000 + %cmp2 = icmp sge i32 %arg2, 1000 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test24(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test24 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999 + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GT_U32_e64_]], killed [[V_CMP_GT_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp uge i32 %arg1, 1000 + %cmp2 = icmp uge i32 %arg2, 1000 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test25(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test25 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_LT_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp slt i32 %arg1, %arg3 + %cmp2 = icmp slt i32 %arg2, %arg3 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test26(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test26 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LT_U32_e64_]], killed [[V_CMP_LT_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp ult i32 %arg1, %arg3 + %cmp2 = icmp ult i32 %arg2, %arg3 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test27(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test27 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_LE_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LE_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LE_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LE_I32_e64_]], killed [[V_CMP_LE_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp sle i32 %arg1, %arg3 + %cmp2 = icmp sle i32 %arg2, %arg3 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test28(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test28 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_LE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LE_U32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LE_U32_e64_]], killed [[V_CMP_LE_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp ule i32 %arg1, %arg3 + %cmp2 = icmp ule i32 %arg2, %arg3 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test29(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test29 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GT_I32_e64_]], killed [[V_CMP_GT_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp sgt i32 %arg1, %arg3 + %cmp2 = icmp sgt i32 %arg2, %arg3 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test30(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test30 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GT_U32_e64_]], killed [[V_CMP_GT_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp ugt i32 %arg1, %arg3 + %cmp2 = icmp ugt i32 %arg2, %arg3 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test31(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test31 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_GE_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_I32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GE_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GE_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GE_I32_e64_]], killed [[V_CMP_GE_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp sge i32 %arg1, %arg3 + %cmp2 = icmp sge i32 %arg2, %arg3 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test32(i32 %arg1, i32 %arg2, i32 %arg3) #0 { + ; CHECK-LABEL: name: test32 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_GE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[COPY2]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GE_U32_e64_]], killed [[V_CMP_GE_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp uge i32 %arg1, %arg3 + %cmp2 = icmp uge i32 %arg2, %arg3 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define i1 @test33(i32 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: test33 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_LT_I32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp slt i32 %arg1, %arg2 + %cmp2 = icmp slt i32 %arg1, 1000 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test34(i32 %arg1, i64 %arg2) #0 { + ; CHECK-LABEL: name: test34 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY2]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, killed [[S_MOV_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE]], killed [[REG_SEQUENCE1]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_LT_I64_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp slt i32 %arg1, 1000 + %cmp2 = icmp slt i64 %arg2, 1000 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test35(i32 %arg1, i64 %arg2) #0 { + ; CHECK-LABEL: name: test35 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY2]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, killed [[S_MOV_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U64_e64 killed [[REG_SEQUENCE]], killed [[REG_SEQUENCE1]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_EQ_U32_e64_]], killed [[V_CMP_EQ_U64_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp eq i32 %arg1, 1000 + %cmp2 = icmp eq i64 %arg2, 1000 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define i1 @test36(i32 %arg1, i64 %arg2) #0 { + ; CHECK-LABEL: name: test36 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 + ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[COPY2]], [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, killed [[S_MOV_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U64_e64 killed [[REG_SEQUENCE]], killed [[REG_SEQUENCE1]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_NE_U32_e64_]], killed [[V_CMP_NE_U64_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %cmp1 = icmp ne i32 %arg1, 1000 + %cmp2 = icmp ne i64 %arg2, 1000 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll --- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll @@ -1,30 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose -disable-block-placement -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone -; SI-LABEL: {{^}}test_if: ; Make sure the i1 values created by the cfg structurizer pass are ; moved using VALU instructions - - +define amdgpu_kernel void @test_if(i32 %b, ptr addrspace(1) %src, ptr addrspace(1) %dst) #1 { +; SI-LABEL: test_if: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_load_dword s8, s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; ; waitcnt should be inserted after exec modification -; SI: v_cmp_lt_i32_e32 vcc, 1, -; SI-NEXT: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0 -; SI-NEXT: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0 -; SI-NEXT: s_and_saveexec_b64 [[SAVE1:s\[[0-9]+:[0-9]+\]]], vcc -; SI-NEXT: s_xor_b64 [[SAVE2:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE1]] -; SI-NEXT: s_cbranch_execz [[FLOW_BB:.LBB[0-9]+_[0-9]+]] - -; SI-NEXT: ; %bb.{{[0-9]+}}: ; %LeafBlock3 -; SI: s_mov_b64 s[{{[0-9]:[0-9]}}], -1 -; SI: s_and_saveexec_b64 -; SI-NEXT: s_cbranch_execnz - -; v_mov should be after exec modification -; SI: [[FLOW_BB]]: -; SI-NEXT: s_andn2_saveexec_b64 [[SAVE2]], [[SAVE2]] +; SI-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0 +; SI-NEXT: s_mov_b64 s[10:11], 0 +; SI-NEXT: s_mov_b64 s[2:3], 0 +; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc +; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; SI-NEXT: s_cbranch_execz .LBB0_3 +; SI-NEXT: ; %bb.1: ; %LeafBlock3 +; SI-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 +; SI-NEXT: s_mov_b64 s[2:3], -1 +; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc +; SI-NEXT: s_cbranch_execnz .LBB0_9 +; SI-NEXT: .LBB0_2: ; %Flow7 +; SI-NEXT: s_or_b64 exec, exec, s[6:7] +; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec ; -define amdgpu_kernel void @test_if(i32 %b, ptr addrspace(1) %src, ptr addrspace(1) %dst) #1 { +; v_mov should be after exec modification +; SI-NEXT: .LBB0_3: ; %Flow6 +; SI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; SI-NEXT: ; %bb.4: ; %LeafBlock +; SI-NEXT: s_mov_b64 s[10:11], exec +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 +; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec +; SI-NEXT: s_and_b64 s[6:7], vcc, exec +; SI-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] +; SI-NEXT: ; %bb.5: ; %Flow8 +; SI-NEXT: s_or_b64 exec, exec, s[4:5] +; SI-NEXT: s_and_saveexec_b64 s[4:5], s[2:3] +; SI-NEXT: s_xor_b64 s[2:3], exec, s[4:5] +; SI-NEXT: s_cbranch_execnz .LBB0_10 +; SI-NEXT: .LBB0_6: ; %Flow9 +; SI-NEXT: s_or_b64 exec, exec, s[2:3] +; SI-NEXT: s_and_saveexec_b64 s[2:3], s[10:11] +; SI-NEXT: s_cbranch_execz .LBB0_8 +; SI-NEXT: ; %bb.7: ; %case1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_ashr_i32 s9, s8, 31 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: s_lshl_b64 s[4:5], s[8:9], 2 +; SI-NEXT: v_mov_b32_e32 v2, 13 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: v_mov_b32_e32 v1, s5 +; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: .LBB0_8: ; %end +; SI-NEXT: s_endpgm +; SI-NEXT: .LBB0_9: ; %case2 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_ashr_i32 s9, s8, 31 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: s_lshl_b64 s[12:13], s[8:9], 2 +; SI-NEXT: v_mov_b32_e32 v3, 17 +; SI-NEXT: v_mov_b32_e32 v1, s12 +; SI-NEXT: v_mov_b32_e32 v2, s13 +; SI-NEXT: buffer_store_dword v3, v[1:2], s[0:3], 0 addr64 +; SI-NEXT: s_xor_b64 s[2:3], exec, -1 +; SI-NEXT: s_branch .LBB0_2 +; SI-NEXT: .LBB0_10: ; %default +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 2, v0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_ashr_i32 s9, s8, 31 +; SI-NEXT: s_lshl_b64 s[4:5], s[8:9], 2 +; SI-NEXT: s_add_u32 s4, s0, s4 +; SI-NEXT: s_addc_u32 s5, s1, s5 +; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc +; SI-NEXT: s_xor_b64 s[12:13], exec, s[6:7] +; SI-NEXT: s_cbranch_execnz .LBB0_14 +; SI-NEXT: .LBB0_11: ; %Flow +; SI-NEXT: s_andn2_saveexec_b64 s[12:13], s[12:13] +; SI-NEXT: s_cbranch_execz .LBB0_13 +; SI-NEXT: ; %bb.12: ; %if +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, 19 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: .LBB0_13: ; %Flow5 +; SI-NEXT: s_or_b64 exec, exec, s[12:13] +; SI-NEXT: s_andn2_b64 s[10:11], s[10:11], exec +; SI-NEXT: s_branch .LBB0_6 +; SI-NEXT: .LBB0_14: ; %else +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: v_mov_b32_e32 v0, 21 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_branch .LBB0_11 entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone switch i32 %tid, label %default [ @@ -59,17 +133,23 @@ ret void } -; SI-LABEL: {{^}}simple_test_v_if: -; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} -; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc -; SI-NEXT: s_cbranch_execz [[EXIT:.LBB[0-9]+_[0-9]+]] - -; SI-NEXT: ; %bb.{{[0-9]+}}: -; SI: buffer_store_dword - -; SI-NEXT: {{^}}[[EXIT]]: -; SI: s_endpgm define amdgpu_kernel void @simple_test_v_if(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 { +; SI-LABEL: simple_test_v_if: +; SI: ; %bb.0: +; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc +; SI-NEXT: s_cbranch_execz .LBB1_2 +; SI-NEXT: ; %bb.1: ; %then +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: v_mov_b32_e32 v2, 0x3e7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: .LBB1_2: ; %exit +; SI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %is.0 = icmp ne i32 %tid, 0 br i1 %is.0, label %then, label %exit @@ -84,18 +164,23 @@ } ; FIXME: It would be better to endpgm in the then block. - -; SI-LABEL: {{^}}simple_test_v_if_ret_else_ret: -; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} -; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc -; SI-NEXT: s_cbranch_execz [[EXIT:.LBB[0-9]+_[0-9]+]] - -; SI-NEXT: ; %bb.{{[0-9]+}}: -; SI: buffer_store_dword - -; SI-NEXT: {{^}}[[EXIT]]: -; SI: s_endpgm define amdgpu_kernel void @simple_test_v_if_ret_else_ret(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 { +; SI-LABEL: simple_test_v_if_ret_else_ret: +; SI: ; %bb.0: +; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc +; SI-NEXT: s_cbranch_execz .LBB2_2 +; SI-NEXT: ; %bb.1: ; %then +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: v_mov_b32_e32 v2, 0x3e7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: .LBB2_2: ; %UnifiedReturnBlock +; SI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %is.0 = icmp ne i32 %tid, 0 br i1 %is.0, label %then, label %exit @@ -112,27 +197,33 @@ ; Final block has more than a ret to execute. This was miscompiled ; before function exit blocks were unified since the endpgm would ; terminate the then wavefront before reaching the store. - -; SI-LABEL: {{^}}simple_test_v_if_ret_else_code_ret: -; SI: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}} -; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc -; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]] -; SI: s_cbranch_execnz [[EXIT:.LBB[0-9]+_[0-9]+]] - -; SI-NEXT: {{^.LBB[0-9]+_[0-9]+}}: ; %Flow -; SI-NEXT: s_andn2_saveexec_b64 [[BR_SREG]], [[BR_SREG]] -; SI-NEXT: s_cbranch_execz [[UNIFIED_RETURN:.LBB[0-9]+_[0-9]+]] - -; SI-NEXT: ; %bb.{{[0-9]+}}: ; %then -; SI: s_waitcnt -; SI-NEXT: buffer_store_dword - -; SI-NEXT: {{^}}[[UNIFIED_RETURN]]: ; %UnifiedReturnBlock -; SI: s_endpgm - -; SI-NEXT: {{^}}[[EXIT]]: -; SI: ds_write_b32 define amdgpu_kernel void @simple_test_v_if_ret_else_code_ret(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 { +; SI-LABEL: simple_test_v_if_ret_else_code_ret: +; SI: ; %bb.0: +; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc +; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3] +; SI-NEXT: s_cbranch_execnz .LBB3_4 +; SI-NEXT: .LBB3_1: ; %Flow +; SI-NEXT: s_andn2_saveexec_b64 s[2:3], s[2:3] +; SI-NEXT: s_cbranch_execz .LBB3_3 +; SI-NEXT: ; %bb.2: ; %then +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: v_mov_b32_e32 v2, 0x3e7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: .LBB3_3: ; %UnifiedReturnBlock +; SI-NEXT: s_endpgm +; SI-NEXT: .LBB3_4: ; %exit +; SI-NEXT: v_mov_b32_e32 v0, 7 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_write_b32 v0, v0 +; SI-NEXT: ; implicit-def: $vgpr0 +; SI-NEXT: s_branch .LBB3_1 %tid = call i32 @llvm.amdgcn.workitem.id.x() %is.0 = icmp ne i32 %tid, 0 br i1 %is.0, label %then, label %exit @@ -147,21 +238,37 @@ ret void } -; SI-LABEL: {{^}}simple_test_v_loop: -; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} -; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc -; SI-NEXT: s_cbranch_execz [[LABEL_EXIT:.LBB[0-9]+_[0-9]+]] - -; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}} - -; SI: [[LABEL_LOOP:.LBB[0-9]+_[0-9]+]]: -; SI: buffer_load_dword -; SI-DAG: buffer_store_dword -; SI-DAG: s_cmpk_lg_i32 s{{[0-9]+}}, 0x100 -; SI: s_cbranch_scc1 [[LABEL_LOOP]] -; SI: [[LABEL_EXIT]]: -; SI: s_endpgm define amdgpu_kernel void @simple_test_v_loop(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 { +; SI-LABEL: simple_test_v_loop: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc +; SI-NEXT: s_cbranch_execz .LBB4_3 +; SI-NEXT: ; %bb.1: ; %loop.preheader +; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 +; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; SI-NEXT: s_mov_b64 s[0:1], 0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[10:11] +; SI-NEXT: v_mov_b32_e32 v1, s9 +; SI-NEXT: v_add_i32_e32 v0, vcc, s8, v0 +; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: .LBB4_2: ; %loop +; SI-NEXT: ; =>This Inner Loop Header: Depth=1 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: buffer_load_dword v2, off, s[4:7], 0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: s_add_u32 s0, s0, 4 +; SI-NEXT: s_addc_u32 s1, s1, 0 +; SI-NEXT: s_cmpk_lg_i32 s0, 0x100 +; SI-NEXT: s_cbranch_scc1 .LBB4_2 +; SI-NEXT: .LBB4_3: ; %exit +; SI-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %is.0 = icmp ne i32 %tid, 0 @@ -182,45 +289,86 @@ ret void } -; SI-LABEL: {{^}}multi_vcond_loop: - +define amdgpu_kernel void @multi_vcond_loop(ptr addrspace(1) noalias nocapture %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture readonly %arg2, ptr addrspace(1) noalias nocapture readonly %arg3) #1 { +; SI-LABEL: multi_vcond_loop: +; ; Load loop limit from buffer ; Branch to exit if uniformly not taken -; SI: ; %bb.0: -; SI: buffer_load_dword [[VBOUND:v[0-9]+]] -; SI: v_cmp_lt_i32_e32 vcc -; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc -; SI-NEXT: s_cbranch_execz [[LABEL_EXIT:.LBB[0-9]+_[0-9]+]] - +; SI: ; %bb.0: ; %bb +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xf +; SI-NEXT: s_mov_b32 s6, 0 +; SI-NEXT: v_mov_b32_e32 v3, 0 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_load_dword v2, v[2:3], s[4:7], 0 addr64 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2 +; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc +; SI-NEXT: s_cbranch_execz .LBB5_5 +; ; Initialize inner condition to false -; SI: ; %bb.{{[0-9]+}}: ; %bb10.preheader -; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], 0{{$}} - +; SI-NEXT: ; %bb.1: ; %bb10.preheader +; SI-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: v_mov_b32_e32 v1, v3 +; SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; SI-NEXT: v_lshl_b64 v[6:7], v[0:1], 2 +; SI-NEXT: s_mov_b64 s[2:3], 0 +; SI-NEXT: ; implicit-def: $sgpr8_sgpr9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v1, s13 +; SI-NEXT: v_add_i32_e32 v0, vcc, s12, v6 +; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc +; SI-NEXT: v_mov_b32_e32 v5, s1 +; SI-NEXT: v_add_i32_e32 v4, vcc, s0, v6 +; SI-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc +; SI-NEXT: v_mov_b32_e32 v8, s15 +; SI-NEXT: v_add_i32_e32 v6, vcc, s14, v6 +; SI-NEXT: v_addc_u32_e32 v7, vcc, v8, v7, vcc +; SI-NEXT: s_mov_b64 s[10:11], 0 +; ; Clear exec bits for workitems that load -1s -; SI: .L[[LABEL_LOOP:BB[0-9]+_[0-9]+]]: -; SI: buffer_load_dword [[B:v[0-9]+]] -; SI: buffer_load_dword [[A:v[0-9]+]] -; SI-DAG: v_cmp_ne_u32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]] -; SI-DAG: v_cmp_ne_u32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]] -; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]] -; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]] -; SI: s_cbranch_execz [[LABEL_FLOW:.LBB[0-9]+_[0-9]+]] - -; SI: ; %bb.{{[0-9]+}}: ; %bb20 -; SI: buffer_store_dword - -; SI: [[LABEL_FLOW]]: -; SI-NEXT: ; in Loop: Header=[[LABEL_LOOP]] -; SI-NEXT: s_or_b64 exec, exec, [[ORNEG2]] -; SI-NEXT: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], -; SI-NEXT: s_or_b64 [[COND_STATE]], [[TMP1]], [[COND_STATE]] -; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]] -; SI-NEXT: s_cbranch_execnz .L[[LABEL_LOOP]] - -; SI: [[LABEL_EXIT]]: -; SI-NOT: [[COND_STATE]] -; SI: s_endpgm -define amdgpu_kernel void @multi_vcond_loop(ptr addrspace(1) noalias nocapture %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture readonly %arg2, ptr addrspace(1) noalias nocapture readonly %arg3) #1 { +; SI-NEXT: .LBB5_2: ; %bb10 +; SI-NEXT: ; =>This Inner Loop Header: Depth=1 +; SI-NEXT: s_mov_b32 s4, s6 +; SI-NEXT: s_mov_b32 s5, s6 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: buffer_load_dword v8, v[6:7], s[4:7], 0 addr64 +; SI-NEXT: buffer_load_dword v9, v[4:5], s[4:7], 0 addr64 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v8 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], -1, v9 +; SI-NEXT: s_and_b64 s[12:13], vcc, s[0:1] +; SI-NEXT: s_or_b64 s[8:9], s[8:9], exec +; SI-NEXT: s_and_saveexec_b64 s[0:1], s[12:13] +; SI-NEXT: s_cbranch_execz .LBB5_4 +; SI-NEXT: ; %bb.3: ; %bb20 +; SI-NEXT: ; in Loop: Header=BB5_2 Depth=1 +; SI-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; SI-NEXT: s_add_u32 s10, s10, 1 +; SI-NEXT: v_add_i32_e32 v4, vcc, 4, v4 +; SI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; SI-NEXT: v_add_i32_e32 v6, vcc, 4, v6 +; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; SI-NEXT: buffer_store_dword v8, v[0:1], s[4:7], 0 addr64 +; SI-NEXT: s_addc_u32 s11, s11, 0 +; SI-NEXT: v_add_i32_e32 v0, vcc, 4, v0 +; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; SI-NEXT: v_cmp_ge_i64_e32 vcc, s[10:11], v[2:3] +; SI-NEXT: s_andn2_b64 s[4:5], s[8:9], exec +; SI-NEXT: s_and_b64 s[8:9], vcc, exec +; SI-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] +; SI-NEXT: .LBB5_4: ; %Flow +; SI-NEXT: ; in Loop: Header=BB5_2 Depth=1 +; SI-NEXT: s_or_b64 exec, exec, s[0:1] +; SI-NEXT: s_and_b64 s[0:1], exec, s[8:9] +; SI-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] +; SI-NEXT: s_andn2_b64 exec, exec, s[2:3] +; SI-NEXT: s_cbranch_execnz .LBB5_2 +; SI-NEXT: .LBB5_5: ; %bb26 +; SI-NEXT: s_endpgm bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tmp4 = sext i32 %tmp to i64