diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4261,6 +4261,13 @@ MI.eraseFromParent(); return BB; } + case AMDGPU::V_ADDC_U32_e32: + case AMDGPU::V_SUBB_U32_e32: + case AMDGPU::V_SUBBREV_U32_e32: + // These instructions have an implicit use of vcc which counts towards the + // constant bus limit. + TII->legalizeOperands(MI); + return BB; case AMDGPU::DS_GWS_INIT: case AMDGPU::DS_GWS_SEMA_BR: case AMDGPU::DS_GWS_BARRIER: diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -188,7 +188,7 @@ let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { def _e32 : VOP2_Pseudo .ret>, Commutable_REV { - let usesCustomInserter = !eq(P.NumSrcArgs, 2); + let usesCustomInserter = true; } foreach _ = BoolToList.ret in diff --git a/llvm/test/CodeGen/AMDGPU/uaddo.ll b/llvm/test/CodeGen/AMDGPU/uaddo.ll --- a/llvm/test/CodeGen/AMDGPU/uaddo.ll +++ b/llvm/test/CodeGen/AMDGPU/uaddo.ll @@ -220,10 +220,24 @@ ret void } +; FUNC-LABEL: {{^}}sv_uaddo_i128: +; GCN: v_add +; GCN: v_addc +; GCN: v_addc +; GCN: v_addc +define amdgpu_cs void @sv_uaddo_i128(i32 addrspace(1)* %out, i128 inreg %a, i128 %b) { + %uadd = call { i128, i1 } @llvm.uadd.with.overflow.i128(i128 %a, i128 %b) + %carry = extractvalue { i128, i1 } %uadd, 1 + %carry.ext = zext i1 %carry to i32 + store i32 %carry.ext, i32 addrspace(1)* %out + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) #1 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #1 +declare { i128, i1 } @llvm.uadd.with.overflow.i128(i128, i128) #1 declare { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32>, <2 x i32>) nounwind readnone