diff --git a/llvm/test/CodeGen/RISCV/addcarry.ll b/llvm/test/CodeGen/RISCV/addcarry.ll --- a/llvm/test/CodeGen/RISCV/addcarry.ll +++ b/llvm/test/CodeGen/RISCV/addcarry.ll @@ -5,6 +5,7 @@ ; Signed fixed point multiplication eventually expands down to an ADDCARRY. declare i64 @llvm.smul.fix.i64 (i64, i64, i32) +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) define i64 @addcarry(i64 %x, i64 %y) nounwind { ; RISCV32-LABEL: addcarry: @@ -42,3 +43,38 @@ %tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 2); ret i64 %tmp; } + +; negative test for combineCarryDiamond(): ADDCARRY not legal +define { i32, i32, i1 } @addcarry_2x32(i32 %x0, i32 %x1, i32 %y0, i32 %y1) nounwind { +; RISCV32-LABEL: addcarry_2x32: +; RISCV32: # %bb.0: +; RISCV32-NEXT: add a3, a1, a3 +; RISCV32-NEXT: sltu a1, a3, a1 +; RISCV32-NEXT: add a4, a2, a4 +; RISCV32-NEXT: sltu a2, a4, a2 +; RISCV32-NEXT: add a1, a4, a1 +; RISCV32-NEXT: sltu a4, a1, a4 +; RISCV32-NEXT: or a2, a2, a4 +; RISCV32-NEXT: sw a3, 0(a0) +; RISCV32-NEXT: sw a1, 4(a0) +; RISCV32-NEXT: sb a2, 8(a0) +; RISCV32-NEXT: ret + %t0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x0, i32 %y0) + %s0 = extractvalue { i32, i1 } %t0, 0 + %k0 = extractvalue { i32, i1 } %t0, 1 + + %t1 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x1, i32 %y1) + %s1 = extractvalue { i32, i1 } %t1, 0 + %k1 = extractvalue { i32, i1 } %t1, 1 + + %zk0 = zext i1 %k0 to i32 + %t2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %s1, i32 %zk0) + %s2 = extractvalue { i32, i1 } %t2, 0 + %k2 = extractvalue { i32, i1 } %t2, 1 + %k = or i1 %k1, %k2 + + %r0 = insertvalue { i32, i32, i1 } poison, i32 %s0, 0 + %r1 = insertvalue { i32, i32, i1 } %r0, i32 %s2, 1 + %r = insertvalue { i32, i32, i1 } %r1, i1 %k, 2 + ret { i32, i32, i1 } %r +} diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll --- a/llvm/test/CodeGen/X86/addcarry.ll +++ b/llvm/test/CodeGen/X86/addcarry.ll @@ -415,6 +415,231 @@ ret i128 %sub2 } +; basic test for combineCarryDiamond() +define { i64, i64, i1 } @addcarry_2x64(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind { +; CHECK-LABEL: addcarry_2x64: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: adcq %rcx, %rsi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: retq + %t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0) + %s0 = extractvalue { i64, i1 } %t0, 0 + %k0 = extractvalue { i64, i1 } %t0, 1 + + %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x1, i64 %y1) + %s1 = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %zk0 = zext i1 %k0 to i64 + %t2 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %s1, i64 %zk0) + %s2 = extractvalue { i64, i1 } %t2, 0 + %k2 = extractvalue { i64, i1 } %t2, 1 + %k = or i1 %k1, %k2 + + %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0 + %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1 + %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2 + ret { i64, i64, i1 } %r +} + +; basic test for combineCarryDiamond() with or operands reversed +define { i64, i64, i1 } @addcarry_2x64_or_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind { +; CHECK-LABEL: addcarry_2x64_or_reversed: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: adcq %rcx, %rsi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: retq + %t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0) + %s0 = extractvalue { i64, i1 } %t0, 0 + %k0 = extractvalue { i64, i1 } %t0, 1 + + %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x1, i64 %y1) + %s1 = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %zk0 = zext i1 %k0 to i64 + %t2 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %s1, i64 %zk0) + %s2 = extractvalue { i64, i1 } %t2, 0 + %k2 = extractvalue { i64, i1 } %t2, 1 + %k = or i1 %k2, %k1 ; reverse natural order of operands + + %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0 + %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1 + %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2 + ret { i64, i64, i1 } %r +} + +; basic test for combineCarryDiamond() with xor operands reversed +define { i64, i64, i1 } @addcarry_2x64_xor_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind { +; CHECK-LABEL: addcarry_2x64_xor_reversed: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: adcq %rcx, %rsi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: retq + %t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0) + %s0 = extractvalue { i64, i1 } %t0, 0 + %k0 = extractvalue { i64, i1 } %t0, 1 + + %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x1, i64 %y1) + %s1 = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %zk0 = zext i1 %k0 to i64 + %t2 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %s1, i64 %zk0) + %s2 = extractvalue { i64, i1 } %t2, 0 + %k2 = extractvalue { i64, i1 } %t2, 1 + %k = xor i1 %k2, %k1 ; reverse natural order of operands + + %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0 + %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1 + %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2 + ret { i64, i64, i1 } %r +} + +; basic test for combineCarryDiamond() with and operands reversed +define { i64, i64, i1 } @addcarry_2x64_and_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind { +; CHECK-LABEL: addcarry_2x64_and_reversed: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: adcq %rcx, %rsi +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: retq + %t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0) + %s0 = extractvalue { i64, i1 } %t0, 0 + %k0 = extractvalue { i64, i1 } %t0, 1 + + %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x1, i64 %y1) + %s1 = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %zk0 = zext i1 %k0 to i64 + %t2 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %s1, i64 %zk0) + %s2 = extractvalue { i64, i1 } %t2, 0 + %k2 = extractvalue { i64, i1 } %t2, 1 + %k = and i1 %k2, %k1 ; reverse natural order of operands + + %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0 + %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1 + %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2 + ret { i64, i64, i1 } %r +} + +; basic test for combineCarryDiamond() with add operands reversed +define { i64, i64, i1 } @addcarry_2x64_add_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind { +; CHECK-LABEL: addcarry_2x64_add_reversed: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: adcq %rcx, %rsi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: retq + %t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0) + %s0 = extractvalue { i64, i1 } %t0, 0 + %k0 = extractvalue { i64, i1 } %t0, 1 + + %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x1, i64 %y1) + %s1 = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %zk0 = zext i1 %k0 to i64 + %t2 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %s1, i64 %zk0) + %s2 = extractvalue { i64, i1 } %t2, 0 + %k2 = extractvalue { i64, i1 } %t2, 1 + %k = add i1 %k2, %k1 ; reverse natural order of operands + + %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0 + %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1 + %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2 + ret { i64, i64, i1 } %r +} + +; Here %carryin is considered as valid carry flag for combining into ADDCARRY +; although %carryin does not come from any carry-producing instruction. +define { i64, i1 } @addcarry_fake_carry(i64 %a, i64 %b, i1 %carryin) { +; CHECK-LABEL: addcarry_fake_carry: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: andb $1, %dl +; CHECK-NEXT: addb $-1, %dl +; CHECK-NEXT: adcq %rsi, %rax +; CHECK-NEXT: setb %dl +; CHECK-NEXT: retq + %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %partial = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %zcarryin = zext i1 %carryin to i64 + %sum = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %partial, i64 %zcarryin) + %k2 = extractvalue { i64, i1 } %sum, 1 + + %carryout = or i1 %k1, %k2 + + %ret = insertvalue { i64, i1 } %sum, i1 %carryout, 1 + ret { i64, i1 } %ret +} + +; negative test: %carryin does not look like carry +define { i64, i1 } @addcarry_carry_not_zext(i64 %a, i64 %b, i64 %carryin) { +; CHECK-LABEL: addcarry_carry_not_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: addq %rsi, %rax +; CHECK-NEXT: setb %cl +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: setb %dl +; CHECK-NEXT: orb %cl, %dl +; CHECK-NEXT: retq + %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %partial = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %sum = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %partial, i64 %carryin) + %k2 = extractvalue { i64, i1 } %sum, 1 + + %carryout = or i1 %k1, %k2 + + %ret = insertvalue { i64, i1 } %sum, i1 %carryout, 1 + ret { i64, i1 } %ret +} + +; negative test: %carryin does not look like carry +define { i64, i1 } @addcarry_carry_not_i1(i64 %a, i64 %b, i8 %carryin) { +; CHECK-LABEL: addcarry_carry_not_i1: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edx killed $edx def $rdx +; CHECK-NEXT: addq %rsi, %rdi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: movzbl %dl, %eax +; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: setb %dl +; CHECK-NEXT: orb %cl, %dl +; CHECK-NEXT: retq + %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %partial = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %zcarryin = zext i8 %carryin to i64 + %sum = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %partial, i64 %zcarryin) + %k2 = extractvalue { i64, i1 } %sum, 1 + + %carryout = or i1 %k1, %k2 + + %ret = insertvalue { i64, i1 } %sum, i1 %carryout, 1 + ret { i64, i1 } %ret +} + %struct.U320 = type { [5 x i64] } define i32 @add_U320_without_i128_add(%struct.U320* nocapture dereferenceable(40) %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) { diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll --- a/llvm/test/CodeGen/X86/subcarry.ll +++ b/llvm/test/CodeGen/X86/subcarry.ll @@ -187,6 +187,238 @@ ret i64 %res } +; basic test for combineCarryDiamond() +define { i64, i64, i1 } @subcarry_2x64(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind { +; CHECK-LABEL: subcarry_2x64: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: subq %rdx, %rax +; CHECK-NEXT: sbbq %rcx, %rsi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: retq + %t0 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x0, i64 %y0) + %s0 = extractvalue { i64, i1 } %t0, 0 + %k0 = extractvalue { i64, i1 } %t0, 1 + + %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x1, i64 %y1) + %s1 = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %zk0 = zext i1 %k0 to i64 + %t2 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %s1, i64 %zk0) + %s2 = extractvalue { i64, i1 } %t2, 0 + %k2 = extractvalue { i64, i1 } %t2, 1 + %k = or i1 %k1, %k2 + + %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0 + %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1 + %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2 + ret { i64, i64, i1 } %r +} + +; basic test for combineCarryDiamond() with or operands reversed +define { i64, i64, i1 } @subcarry_2x64_or_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind { +; CHECK-LABEL: subcarry_2x64_or_reversed: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: subq %rdx, %rax +; CHECK-NEXT: sbbq %rcx, %rsi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: retq + %t0 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x0, i64 %y0) + %s0 = extractvalue { i64, i1 } %t0, 0 + %k0 = extractvalue { i64, i1 } %t0, 1 + + %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x1, i64 %y1) + %s1 = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %zk0 = zext i1 %k0 to i64 + %t2 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %s1, i64 %zk0) + %s2 = extractvalue { i64, i1 } %t2, 0 + %k2 = extractvalue { i64, i1 } %t2, 1 + %k = or i1 %k2, %k1 ; reverse natural order of operands + + %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0 + %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1 + %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2 + ret { i64, i64, i1 } %r +} + +; basic test for combineCarryDiamond() with xor operands reversed +define { i64, i64, i1 } @subcarry_2x64_xor_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind { +; CHECK-LABEL: subcarry_2x64_xor_reversed: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: subq %rdx, %rax +; CHECK-NEXT: sbbq %rcx, %rsi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: retq + %t0 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x0, i64 %y0) + %s0 = extractvalue { i64, i1 } %t0, 0 + %k0 = extractvalue { i64, i1 } %t0, 1 + + %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x1, i64 %y1) + %s1 = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %zk0 = zext i1 %k0 to i64 + %t2 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %s1, i64 %zk0) + %s2 = extractvalue { i64, i1 } %t2, 0 + %k2 = extractvalue { i64, i1 } %t2, 1 + %k = xor i1 %k2, %k1 ; reverse natural order of operands + + %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0 + %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1 + %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2 + ret { i64, i64, i1 } %r +} + +; basic test for combineCarryDiamond() with and operands reversed +define { i64, i64, i1 } @subcarry_2x64_and_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind { +; CHECK-LABEL: subcarry_2x64_and_reversed: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: subq %rdx, %rax +; CHECK-NEXT: sbbq %rcx, %rsi +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: retq + %t0 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x0, i64 %y0) + %s0 = extractvalue { i64, i1 } %t0, 0 + %k0 = extractvalue { i64, i1 } %t0, 1 + + %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x1, i64 %y1) + %s1 = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %zk0 = zext i1 %k0 to i64 + %t2 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %s1, i64 %zk0) + %s2 = extractvalue { i64, i1 } %t2, 0 + %k2 = extractvalue { i64, i1 } %t2, 1 + %k = and i1 %k2, %k1 ; reverse natural order of operands + + %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0 + %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1 + %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2 + ret { i64, i64, i1 } %r +} + +; basic test for combineCarryDiamond() with add operands reversed +define { i64, i64, i1 } @subcarry_2x64_add_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind { +; CHECK-LABEL: subcarry_2x64_add_reversed: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: subq %rdx, %rax +; CHECK-NEXT: setb %dil +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: subq %rcx, %rdx +; CHECK-NEXT: subq %rdi, %rdx +; CHECK-NEXT: setb %dil +; CHECK-NEXT: cmpq %rcx, %rsi +; CHECK-NEXT: adcb $0, %dil +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: retq + %t0 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x0, i64 %y0) + %s0 = extractvalue { i64, i1 } %t0, 0 + %k0 = extractvalue { i64, i1 } %t0, 1 + + %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x1, i64 %y1) + %s1 = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %zk0 = zext i1 %k0 to i64 + %t2 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %s1, i64 %zk0) + %s2 = extractvalue { i64, i1 } %t2, 0 + %k2 = extractvalue { i64, i1 } %t2, 1 + %k = add i1 %k2, %k1 ; reverse natural order of operands + + %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0 + %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1 + %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2 + ret { i64, i64, i1 } %r +} + +; Here %carryin is considered as valid carry flag for combining into ADDCARRY +; although %carryin does not come from any carry-producing instruction. +define { i64, i1 } @subcarry_fake_carry(i64 %a, i64 %b, i1 %carryin) { +; CHECK-LABEL: subcarry_fake_carry: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: andb $1, %dl +; CHECK-NEXT: addb $-1, %dl +; CHECK-NEXT: sbbq %rsi, %rax +; CHECK-NEXT: setb %dl +; CHECK-NEXT: retq + %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %partial = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %zcarryin = zext i1 %carryin to i64 + %s = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %partial, i64 %zcarryin) + %k2 = extractvalue { i64, i1 } %s, 1 + + %carryout = or i1 %k1, %k2 + + %ret = insertvalue { i64, i1 } %s, i1 %carryout, 1 + ret { i64, i1 } %ret +} + +; negative test: %carryin does not look like carry +define { i64, i1 } @subcarry_carry_not_zext(i64 %a, i64 %b, i64 %carryin) { +; CHECK-LABEL: subcarry_carry_not_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: subq %rsi, %rax +; CHECK-NEXT: setb %cl +; CHECK-NEXT: subq %rdx, %rax +; CHECK-NEXT: setb %dl +; CHECK-NEXT: orb %cl, %dl +; CHECK-NEXT: retq + %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %partial = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %s = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %partial, i64 %carryin) + %k2 = extractvalue { i64, i1 } %s, 1 + + %carryout = or i1 %k1, %k2 + + %ret = insertvalue { i64, i1 } %s, i1 %carryout, 1 + ret { i64, i1 } %ret +} + +; negative test: %carryin does not look like carry +define { i64, i1 } @subcarry_carry_not_i1(i64 %a, i64 %b, i8 %carryin) { +; CHECK-LABEL: subcarry_carry_not_i1: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edx killed $edx def $rdx +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: subq %rsi, %rax +; CHECK-NEXT: setb %cl +; CHECK-NEXT: movzbl %dl, %edx +; CHECK-NEXT: subq %rdx, %rax +; CHECK-NEXT: setb %dl +; CHECK-NEXT: orb %cl, %dl +; CHECK-NEXT: retq + %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %partial = extractvalue { i64, i1 } %t1, 0 + %k1 = extractvalue { i64, i1 } %t1, 1 + + %zcarryin = zext i8 %carryin to i64 + %s = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %partial, i64 %zcarryin) + %k2 = extractvalue { i64, i1 } %s, 1 + + %carryout = or i1 %k1, %k2 + + %ret = insertvalue { i64, i1 } %s, i1 %carryout, 1 + ret { i64, i1 } %ret +} + %struct.U320 = type { [5 x i64] } define i32 @sub_U320_without_i128_or(%struct.U320* nocapture dereferenceable(40) %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) { @@ -433,3 +665,24 @@ store i64 %37, i64* %41, align 8 ret void } + +define i1 @ult_2x64(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind { +; CHECK-LABEL: ult_2x64: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rdx, %rdi +; CHECK-NEXT: setb %al +; CHECK-NEXT: subq %rcx, %rsi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: cmpq %rax, %rsi +; CHECK-NEXT: setb %al +; CHECK-NEXT: orb %cl, %al +; CHECK-NEXT: retq + %b0 = icmp ult i64 %x0, %y0 + %d1 = sub i64 %x1, %y1 + %b10 = icmp ult i64 %x1, %y1 + %b0z = zext i1 %b0 to i64 + %b11 = icmp ult i64 %d1, %b0z + %b1 = or i1 %b10, %b11 + ret i1 %b1 +}