diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -992,11 +992,18 @@
     (i64 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_64))
   >;
 
-  let WaveSizePredicate = isWave32 in
-  def : GCNPat <
-    (i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
-    (i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32))
-  >;
+  let WaveSizePredicate = isWave32 in {
+    def : GCNPat <
+      (i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
+      (i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32))
+    >;
+
+    // Support codegen of i64 setcc in wave32 mode.
+    def : GCNPat <
+      (i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
+      (i64 (REG_SEQUENCE SReg_64, (inst $src0, $src1), sub0, (i32 (IMPLICIT_DEF)), sub1))
+    >;
+  }
 }
 
 defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
@@ -1056,13 +1063,22 @@
                            DSTCLAMP.NONE), SReg_64))
   >;
 
-  let WaveSizePredicate = isWave32 in
-  def : GCNPat <
-    (i32 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
-                 (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
-    (i32 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
-                           DSTCLAMP.NONE), SReg_32))
-  >;
+  let WaveSizePredicate = isWave32 in {
+    def : GCNPat <
+      (i32 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
+                        (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
+      (i32 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
+                              DSTCLAMP.NONE), SReg_32))
+    >;
+
+    def : GCNPat <
+      (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
+                        (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
+      (i64 (REG_SEQUENCE SReg_64, (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
+                                   DSTCLAMP.NONE), sub0,
+                                  (i32 (IMPLICIT_DEF)), sub1))
+    >;
+  }
 }
 
 defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck %s
+
+declare i64 @llvm.amdgcn.ballot.i64(i1)
+declare i64 @llvm.ctpop.i64(i64)
+
+; Test ballot(0)
+
+define amdgpu_cs i64 @constant_false() {
+; CHECK-LABEL: constant_false:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    s_mov_b32 s1, 0
+; CHECK-NEXT:    ; return to shader part epilog
+  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 0)
+  ret i64 %ballot
+}
+
+; Test ballot(1)
+
+define amdgpu_cs i64 @constant_true() {
+; CHECK-LABEL: constant_true:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_mov_b32 s0, exec_lo
+; CHECK-NEXT:    s_mov_b32 s1, exec_hi
+; CHECK-NEXT:    ; return to shader part epilog
+  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 1)
+  ret i64 %ballot
+}
+
+; Test ballot of a non-comparison operation
+
+define amdgpu_cs i64 @non_compare(i32 %x) {
+; CHECK-LABEL: non_compare:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
+; CHECK-NEXT:    ; return to shader part epilog
+  %trunc = trunc i32 %x to i1
+  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %trunc)
+  ret i64 %ballot
+}
+
+; Test ballot of comparisons
+
+define amdgpu_cs i64 @compare_ints(i32 %x, i32 %y) {
+; CHECK-LABEL: compare_ints:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s0, v0, v1
+; CHECK-NEXT:    ; return to shader part epilog
+  %cmp = icmp eq i32 %x, %y
+  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
+  ret i64 %ballot
+}
+
+define amdgpu_cs i64 @compare_int_with_constant(i32 %x) {
+; CHECK-LABEL: compare_int_with_constant:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
+; CHECK-NEXT:    ; return to shader part epilog
+  %cmp = icmp sge i32 %x, 99
+  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
+  ret i64 %ballot
+}
+
+define amdgpu_cs i64 @compare_floats(float %x, float %y) {
+; CHECK-LABEL: compare_floats:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_gt_f32_e64 s0, v0, v1
+; CHECK-NEXT:    ; return to shader part epilog
+  %cmp = fcmp ogt float %x, %y
+  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
+  ret i64 %ballot
+}
+
+define amdgpu_cs i64 @ctpop_of_ballot(float %x, float %y) {
+; CHECK-LABEL: ctpop_of_ballot:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_gt_f32_e64 s0, v0, v1
+; CHECK-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
+; CHECK-NEXT:    s_mov_b32 s1, 0
+; CHECK-NEXT:    ; return to shader part epilog
+  %cmp = fcmp ogt float %x, %y
+  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
+  %bcnt = call i64 @llvm.ctpop.i64(i64 %ballot)
+  ret i64 %bcnt
+}