Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -1152,9 +1152,18 @@ static void replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp, Instruction *InsertPt, Intrinsic::ID IID) { + Value *Arg0 = BO->getOperand(0); + Value *Arg1 = BO->getOperand(1); + + // We allow matching the canonical IR (add X, C) back to (usubo X, -C). + if (BO->getOpcode() == Instruction::Add && + IID == Intrinsic::usub_with_overflow) { + assert(isa(Arg1) && "Unexpected input for usubo"); + Arg1 = ConstantExpr::getNeg(cast(Arg1)); + } + IRBuilder<> Builder(InsertPt); - Value *MathOV = Builder.CreateBinaryIntrinsic(IID, BO->getOperand(0), - BO->getOperand(1)); + Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1); Value *Math = Builder.CreateExtractValue(MathOV, 0, "math"); Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov"); BO->replaceAllUsesWith(Math); @@ -1200,6 +1209,74 @@ return true; } +static bool combineToUSubWithOverflow(CmpInst *Cmp, const TargetLowering &TLI, + const DataLayout &DL, bool &ModifiedDT) { + // Convert (A u> B) to (A u< B) to simplify pattern matching. + Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1); + ICmpInst::Predicate Pred = Cmp->getPredicate(); + if (Pred == ICmpInst::ICMP_UGT) { + std::swap(A, B); + Pred = ICmpInst::ICMP_ULT; + } + // Convert special-case: (A == 0) is the same as (A u< 1). + if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) { + B = ConstantInt::get(B->getType(), 1); + Pred = ICmpInst::ICMP_ULT; + } + if (Pred != ICmpInst::ICMP_ULT) + return false; + + // Walk the users of a variable operand of a compare looking for a subtract or + // add with that same operand. Also match the 2nd operand of the compare to + // the add/sub, but that may be a negated constant operand of an add. + Value *CmpVariableOperand = isa(A) ? B : A; + BinaryOperator *Sub = nullptr; + for (User *U : CmpVariableOperand->users()) { + // A - B, A u< B --> usubo(A, B) + if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) + Sub = cast(U); + + // A + (-C), A u< C (canonicalized form of (sub A, C)) + const APInt *CmpC, *AddC; + if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) && + match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) + Sub = cast(U); + + // FIXME: This is a hack to avoid a later pattern-matching failure for + // sum-of-absolute-differences: + // X = B - A + // Y = A - B + // Z = select (A u< B), X, Y + if (match(U, m_Sub(m_Specific(B), m_Specific(A)))) + return false; + } + if (!Sub) + return false; + + // Allow the transform as long as we have an integer type that is not + // obviously illegal and unsupported. + // TODO: Allow vector types. + Type *Ty = Sub->getType(); + if (!isa(Ty)) + return false; + EVT CodegenVT = TLI.getValueType(DL, Ty); + if (!CodegenVT.isSimple() && TLI.isOperationExpand(ISD::UADDO, CodegenVT)) + return false; + + // Pattern matched and profitability checked. Check dominance to determine the + // insertion point for an intrinsic that replaces the subtract and compare. + DominatorTree DT(*Sub->getFunction()); + bool SubDominates = DT.dominates(Sub, Cmp); + if (!SubDominates && !DT.dominates(Cmp, Sub)) + return false; + Instruction *InPt = SubDominates ? cast(Sub) + : cast(Cmp); + replaceMathCmpWithIntrinsic(Sub, Cmp, InPt, Intrinsic::usub_with_overflow); + // Reset callers - do not crash by iterating over a dead instruction. + ModifiedDT = true; + return true; +} + /// Sink the given CmpInst into user blocks to reduce the number of virtual /// registers that must be created and coalesced. This is a clear win except on /// targets with multiple condition code registers (PowerPC), where it might @@ -1266,14 +1343,17 @@ return MadeChange; } -static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI, - const DataLayout &DL) { +static bool optimizeCmp(CmpInst *Cmp, const TargetLowering &TLI, + const DataLayout &DL, bool &ModifiedDT) { if (sinkCmpExpression(Cmp, TLI)) return true; if (combineToUAddWithOverflow(Cmp, TLI, DL)) return true; + if (combineToUSubWithOverflow(Cmp, TLI, DL, ModifiedDT)) + return true; + return false; } @@ -6760,8 +6840,8 @@ return false; } - if (CmpInst *CI = dyn_cast(I)) - if (TLI && optimizeCmpExpression(CI, *TLI, *DL)) + if (auto *Cmp = dyn_cast(I)) + if (TLI && optimizeCmp(Cmp, *TLI, *DL, ModifiedDT)) return true; if (LoadInst *LI = dyn_cast(I)) { Index: test/CodeGen/AArch64/cgp-usubo.ll =================================================================== --- test/CodeGen/AArch64/cgp-usubo.ll +++ test/CodeGen/AArch64/cgp-usubo.ll @@ -21,11 +21,9 @@ define i1 @usubo_ugt_i32(i32 %x, i32 %y, i32* %p) nounwind { ; CHECK-LABEL: usubo_ugt_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w1, w0 -; CHECK-NEXT: cset w8, hi -; CHECK-NEXT: sub w9, w0, w1 -; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: str w9, [x2] +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret %ov = icmp ugt i32 %y, %x %s = sub i32 %x, %y @@ -38,12 +36,11 @@ define i1 @usubo_ugt_constant_op0_i8(i8 %x, i8* %p) nounwind { ; CHECK-LABEL: usubo_ugt_constant_op0_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: mov w9, #42 -; CHECK-NEXT: cmp w8, #42 // =42 -; CHECK-NEXT: sub w9, w9, w0 -; CHECK-NEXT: cset w0, hi -; CHECK-NEXT: strb w9, [x1] +; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: sub w8, w8, w0, uxtb +; CHECK-NEXT: tst w8, #0xffffff00 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: strb w8, [x1] ; CHECK-NEXT: ret %s = sub i8 42, %x %ov = icmp ugt i8 %x, 42 @@ -56,12 +53,11 @@ define i1 @usubo_ult_constant_op0_i16(i16 %x, i16* %p) nounwind { ; CHECK-LABEL: usubo_ult_constant_op0_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: mov w9, #43 -; CHECK-NEXT: cmp w8, #43 // =43 -; CHECK-NEXT: sub w9, w9, w0 -; CHECK-NEXT: cset w0, hi -; CHECK-NEXT: strh w9, [x1] +; CHECK-NEXT: mov w8, #43 +; CHECK-NEXT: sub w8, w8, w0, uxth +; CHECK-NEXT: tst w8, #0xffff0000 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: strh w8, [x1] ; CHECK-NEXT: ret %s = sub i16 43, %x %ov = icmp ult i16 43, %x @@ -75,10 +71,10 @@ ; CHECK-LABEL: usubo_ult_constant_op1_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: cmp w8, #44 // =44 -; CHECK-NEXT: sub w9, w0, #44 // =44 -; CHECK-NEXT: cset w0, lo -; CHECK-NEXT: strh w9, [x1] +; CHECK-NEXT: sub w8, w8, #44 // =44 +; CHECK-NEXT: tst w8, #0xffff0000 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: strh w8, [x1] ; CHECK-NEXT: ret %s = add i16 %x, -44 %ov = icmp ult i16 %x, 44 @@ -90,11 +86,10 @@ ; CHECK-LABEL: usubo_ugt_constant_op1_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: cmp w8, #45 // =45 -; CHECK-NEXT: cset w8, lo -; CHECK-NEXT: sub w9, w0, #45 // =45 -; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: strb w9, [x1] +; CHECK-NEXT: sub w8, w8, #45 // =45 +; CHECK-NEXT: tst w8, #0xffffff00 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: strb w8, [x1] ; CHECK-NEXT: ret %ov = icmp ugt i8 45, %x %s = add i8 %x, -45 @@ -107,9 +102,8 @@ define i1 @usubo_eq_constant1_op1_i32(i32 %x, i32* %p) nounwind { ; CHECK-LABEL: usubo_eq_constant1_op1_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, #0 // =0 -; CHECK-NEXT: sub w8, w0, #1 // =1 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: subs w8, w0, #1 // =1 +; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %s = add i32 %x, -1 Index: test/CodeGen/AMDGPU/sad.ll =================================================================== --- test/CodeGen/AMDGPU/sad.ll +++ test/CodeGen/AMDGPU/sad.ll @@ -291,9 +291,11 @@ ret void } +; This is matched to usubo in CGP. + ; GCN-LABEL: {{^}}v_sad_u32_mismatched_operands_pat2: ; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} +; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) { %icmp0 = icmp ugt i32 %a, %b Index: test/CodeGen/PowerPC/bdzlr.ll =================================================================== --- test/CodeGen/PowerPC/bdzlr.ll +++ test/CodeGen/PowerPC/bdzlr.ll @@ -54,13 +54,13 @@ ; CHECK: @lua_xmove ; CHECK: bnelr ; CHECK: bnelr -; CHECK: bdzlr +; CHECK: bnelr ; CHECK-NOT: blr ; CHECK-CRB: @lua_xmove ; CHECK-CRB: bclr 12, ; CHECK-CRB: bclr 12, -; CHECK-CRB: bdzlr +; CHECK-CRB: bgtlr 0 ; CHECK-CRB-NOT: blr } Index: test/CodeGen/PowerPC/expand-contiguous-isel.ll =================================================================== --- test/CodeGen/PowerPC/expand-contiguous-isel.ll +++ test/CodeGen/PowerPC/expand-contiguous-isel.ll @@ -137,7 +137,8 @@ ; CHECK: bc 12, eq, [[TRUE:.LBB[0-9]+]] ; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] ; CHECK-NEXT: [[TRUE]] -; CHECK-NEXT: addi {{r[0-9]+}}, {{r[0-9]+}}, 0 +; There may be an extra asm comment line here, so can't use CHECK-NEXT. +; CHECK: addi {{r[0-9]+}}, {{r[0-9]+}}, 0 ; CHECK-NEXT: [[SUCCESSOR]] } Index: test/CodeGen/X86/cgp-usubo.ll =================================================================== --- test/CodeGen/X86/cgp-usubo.ll +++ test/CodeGen/X86/cgp-usubo.ll @@ -7,8 +7,8 @@ ; CHECK-LABEL: usubo_ult_i64: ; CHECK: # %bb.0: ; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: movq %rdi, (%rdx) ; CHECK-NEXT: setb %al +; CHECK-NEXT: movq %rdi, (%rdx) ; CHECK-NEXT: retq %s = sub i64 %x, %y store i64 %s, i64* %p @@ -21,9 +21,8 @@ define i1 @usubo_ugt_i32(i32 %x, i32 %y, i32* %p) nounwind { ; CHECK-LABEL: usubo_ugt_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl %edi, %esi -; CHECK-NEXT: seta %al ; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: setb %al ; CHECK-NEXT: movl %edi, (%rdx) ; CHECK-NEXT: retq %ov = icmp ugt i32 %y, %x @@ -39,8 +38,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movb $42, %cl ; CHECK-NEXT: subb %dil, %cl -; CHECK-NEXT: cmpb $42, %dil -; CHECK-NEXT: seta %al +; CHECK-NEXT: setb %al ; CHECK-NEXT: movb %cl, (%rsi) ; CHECK-NEXT: retq %s = sub i8 42, %x @@ -54,10 +52,9 @@ define i1 @usubo_ult_constant_op0_i16(i16 %x, i16* %p) nounwind { ; CHECK-LABEL: usubo_ult_constant_op0_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: movl $43, %ecx -; CHECK-NEXT: subl %edi, %ecx -; CHECK-NEXT: cmpw $43, %di -; CHECK-NEXT: seta %al +; CHECK-NEXT: movw $43, %cx +; CHECK-NEXT: subw %di, %cx +; CHECK-NEXT: setb %al ; CHECK-NEXT: movw %cx, (%rsi) ; CHECK-NEXT: retq %s = sub i16 43, %x @@ -71,11 +68,9 @@ define i1 @usubo_ult_constant_op1_i16(i16 %x, i16* %p) nounwind { ; CHECK-LABEL: usubo_ult_constant_op1_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: addl $-44, %ecx -; CHECK-NEXT: cmpw $44, %di +; CHECK-NEXT: subw $44, %di ; CHECK-NEXT: setb %al -; CHECK-NEXT: movw %cx, (%rsi) +; CHECK-NEXT: movw %di, (%rsi) ; CHECK-NEXT: retq %s = add i16 %x, -44 %ov = icmp ult i16 %x, 44 @@ -86,9 +81,8 @@ define i1 @usubo_ugt_constant_op1_i8(i8 %x, i8* %p) nounwind { ; CHECK-LABEL: usubo_ugt_constant_op1_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpb $45, %dil +; CHECK-NEXT: subb $45, %dil ; CHECK-NEXT: setb %al -; CHECK-NEXT: addb $-45, %dil ; CHECK-NEXT: movb %dil, (%rsi) ; CHECK-NEXT: retq %ov = icmp ugt i8 45, %x @@ -102,11 +96,9 @@ define i1 @usubo_eq_constant1_op1_i32(i32 %x, i32* %p) nounwind { ; CHECK-LABEL: usubo_eq_constant1_op1_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal -1(%rdi), %ecx -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: sete %al -; CHECK-NEXT: movl %ecx, (%rsi) +; CHECK-NEXT: subl $1, %edi +; CHECK-NEXT: setb %al +; CHECK-NEXT: movl %edi, (%rsi) ; CHECK-NEXT: retq %s = add i32 %x, -1 %ov = icmp eq i32 %x, 0 @@ -124,17 +116,14 @@ ; CHECK-NEXT: testb $1, %cl ; CHECK-NEXT: je .LBB7_2 ; CHECK-NEXT: # %bb.1: # %t -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: subq %rsi, %rax -; CHECK-NEXT: movq %rax, (%rdx) -; CHECK-NEXT: testb $1, %cl -; CHECK-NEXT: je .LBB7_2 -; CHECK-NEXT: # %bb.3: # %end -; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: subq %rsi, %rdi ; CHECK-NEXT: setb %al -; CHECK-NEXT: retq +; CHECK-NEXT: movq %rdi, (%rdx) +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: jne .LBB7_3 ; CHECK-NEXT: .LBB7_2: # %f ; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: .LBB7_3: # %end ; CHECK-NEXT: retq entry: br i1 %cond, label %t, label %f Index: test/CodeGen/X86/lsr-loop-exit-cond.ll =================================================================== --- test/CodeGen/X86/lsr-loop-exit-cond.ll +++ test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -16,11 +16,11 @@ ; GENERIC-NEXT: movl (%rdx), %eax ; GENERIC-NEXT: movl 4(%rdx), %ebx ; GENERIC-NEXT: decl %ecx -; GENERIC-NEXT: leaq 20(%rdx), %r14 +; GENERIC-NEXT: leaq 20(%rdx), %r11 ; GENERIC-NEXT: movq _Te0@{{.*}}(%rip), %r9 ; GENERIC-NEXT: movq _Te1@{{.*}}(%rip), %r8 ; GENERIC-NEXT: movq _Te3@{{.*}}(%rip), %r10 -; GENERIC-NEXT: movq %rcx, %r11 +; GENERIC-NEXT: movq %rcx, %r14 ; GENERIC-NEXT: jmp LBB0_1 ; GENERIC-NEXT: .p2align 4, 0x90 ; GENERIC-NEXT: LBB0_2: ## %bb1 @@ -29,14 +29,13 @@ ; GENERIC-NEXT: shrl $16, %ebx ; GENERIC-NEXT: movzbl %bl, %ebx ; GENERIC-NEXT: xorl (%r8,%rbx,4), %eax -; GENERIC-NEXT: xorl -4(%r14), %eax +; GENERIC-NEXT: xorl -4(%r11), %eax ; GENERIC-NEXT: shrl $24, %edi ; GENERIC-NEXT: movzbl %bpl, %ebx ; GENERIC-NEXT: movl (%r10,%rbx,4), %ebx ; GENERIC-NEXT: xorl (%r9,%rdi,4), %ebx -; GENERIC-NEXT: xorl (%r14), %ebx -; GENERIC-NEXT: decq %r11 -; GENERIC-NEXT: addq $16, %r14 +; GENERIC-NEXT: xorl (%r11), %ebx +; GENERIC-NEXT: addq $16, %r11 ; GENERIC-NEXT: LBB0_1: ## %bb ; GENERIC-NEXT: ## =>This Inner Loop Header: Depth=1 ; GENERIC-NEXT: movzbl %al, %edi @@ -47,16 +46,16 @@ ; GENERIC-NEXT: movzbl %bpl, %ebp ; GENERIC-NEXT: movl (%r8,%rbp,4), %ebp ; GENERIC-NEXT: xorl (%r9,%rax,4), %ebp -; GENERIC-NEXT: xorl -12(%r14), %ebp +; GENERIC-NEXT: xorl -12(%r11), %ebp ; GENERIC-NEXT: shrl $24, %ebx ; GENERIC-NEXT: movl (%r10,%rdi,4), %edi ; GENERIC-NEXT: xorl (%r9,%rbx,4), %edi -; GENERIC-NEXT: xorl -8(%r14), %edi +; GENERIC-NEXT: xorl -8(%r11), %edi ; GENERIC-NEXT: movl %ebp, %eax ; GENERIC-NEXT: shrl $24, %eax ; GENERIC-NEXT: movl (%r9,%rax,4), %eax -; GENERIC-NEXT: testq %r11, %r11 -; GENERIC-NEXT: jne LBB0_2 +; GENERIC-NEXT: subq $1, %r14 +; GENERIC-NEXT: jae LBB0_2 ; GENERIC-NEXT: ## %bb.3: ## %bb2 ; GENERIC-NEXT: shlq $4, %rcx ; GENERIC-NEXT: andl $-16777216, %eax ## imm = 0xFF000000 @@ -99,27 +98,26 @@ ; ATOM-NEXT: ## kill: def $ecx killed $ecx def $rcx ; ATOM-NEXT: movl (%rdx), %r15d ; ATOM-NEXT: movl 4(%rdx), %eax -; ATOM-NEXT: leaq 20(%rdx), %r14 +; ATOM-NEXT: leaq 20(%rdx), %r11 ; ATOM-NEXT: movq _Te0@{{.*}}(%rip), %r9 ; ATOM-NEXT: movq _Te1@{{.*}}(%rip), %r8 ; ATOM-NEXT: movq _Te3@{{.*}}(%rip), %r10 ; ATOM-NEXT: decl %ecx -; ATOM-NEXT: movq %rcx, %r11 +; ATOM-NEXT: movq %rcx, %r14 ; ATOM-NEXT: jmp LBB0_1 ; ATOM-NEXT: .p2align 4, 0x90 ; ATOM-NEXT: LBB0_2: ## %bb1 ; ATOM-NEXT: ## in Loop: Header=BB0_1 Depth=1 ; ATOM-NEXT: shrl $16, %eax ; ATOM-NEXT: shrl $24, %edi -; ATOM-NEXT: decq %r11 -; ATOM-NEXT: movzbl %al, %ebp +; ATOM-NEXT: movzbl %al, %eax +; ATOM-NEXT: xorl (%r8,%rax,4), %r15d ; ATOM-NEXT: movzbl %bl, %eax ; ATOM-NEXT: movl (%r10,%rax,4), %eax -; ATOM-NEXT: xorl (%r8,%rbp,4), %r15d +; ATOM-NEXT: xorl -4(%r11), %r15d ; ATOM-NEXT: xorl (%r9,%rdi,4), %eax -; ATOM-NEXT: xorl -4(%r14), %r15d -; ATOM-NEXT: xorl (%r14), %eax -; ATOM-NEXT: addq $16, %r14 +; ATOM-NEXT: xorl (%r11), %eax +; ATOM-NEXT: addq $16, %r11 ; ATOM-NEXT: LBB0_1: ## %bb ; ATOM-NEXT: ## =>This Inner Loop Header: Depth=1 ; ATOM-NEXT: movl %eax, %edi @@ -132,15 +130,15 @@ ; ATOM-NEXT: movzbl %r15b, %edi ; ATOM-NEXT: xorl (%r9,%rbp,4), %ebx ; ATOM-NEXT: movl (%r10,%rdi,4), %edi -; ATOM-NEXT: xorl -12(%r14), %ebx +; ATOM-NEXT: xorl -12(%r11), %ebx ; ATOM-NEXT: xorl (%r9,%rax,4), %edi ; ATOM-NEXT: movl %ebx, %eax -; ATOM-NEXT: xorl -8(%r14), %edi +; ATOM-NEXT: xorl -8(%r11), %edi ; ATOM-NEXT: shrl $24, %eax ; ATOM-NEXT: movl (%r9,%rax,4), %r15d -; ATOM-NEXT: testq %r11, %r11 +; ATOM-NEXT: subq $1, %r14 ; ATOM-NEXT: movl %edi, %eax -; ATOM-NEXT: jne LBB0_2 +; ATOM-NEXT: jae LBB0_2 ; ATOM-NEXT: ## %bb.3: ## %bb2 ; ATOM-NEXT: shrl $16, %eax ; ATOM-NEXT: shrl $8, %edi Index: test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll =================================================================== --- test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll +++ test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll @@ -175,10 +175,11 @@ define i1 @usubo_ult_i64(i64 %x, i64 %y, i64* %p) { ; CHECK-LABEL: @usubo_ult_i64( -; CHECK-NEXT: [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: store i64 [[S]], i64* [[P:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp ult i64 [[X]], [[Y]] -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] ; %s = sub i64 %x, %y store i64 %s, i64* %p @@ -190,10 +191,11 @@ define i1 @usubo_ugt_i32(i32 %x, i32 %y, i32* %p) { ; CHECK-LABEL: @usubo_ugt_i32( -; CHECK-NEXT: [[OV:%.*]] = icmp ugt i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[S:%.*]] = sub i32 [[X]], [[Y]] -; CHECK-NEXT: store i32 [[S]], i32* [[P:%.*]] -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i32 [[MATH]], i32* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] ; %ov = icmp ugt i32 %y, %x %s = sub i32 %x, %y @@ -205,10 +207,11 @@ define i1 @usubo_ugt_constant_op0_i8(i8 %x, i8* %p) { ; CHECK-LABEL: @usubo_ugt_constant_op0_i8( -; CHECK-NEXT: [[S:%.*]] = sub i8 42, [[X:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp ugt i8 [[X]], 42 -; CHECK-NEXT: store i8 [[S]], i8* [[P:%.*]] -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 42, i8 [[X:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i8, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i8 [[MATH]], i8* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] ; %s = sub i8 42, %x %ov = icmp ugt i8 %x, 42 @@ -220,10 +223,11 @@ define i1 @usubo_ult_constant_op0_i16(i16 %x, i16* %p) { ; CHECK-LABEL: @usubo_ult_constant_op0_i16( -; CHECK-NEXT: [[S:%.*]] = sub i16 43, [[X:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp ult i16 43, [[X]] -; CHECK-NEXT: store i16 [[S]], i16* [[P:%.*]] -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 43, i16 [[X:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i16, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i16 [[MATH]], i16* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] ; %s = sub i16 43, %x %ov = icmp ult i16 43, %x @@ -235,10 +239,11 @@ define i1 @usubo_ult_constant_op1_i16(i16 %x, i16* %p) { ; CHECK-LABEL: @usubo_ult_constant_op1_i16( -; CHECK-NEXT: [[S:%.*]] = add i16 [[X:%.*]], -44 -; CHECK-NEXT: [[OV:%.*]] = icmp ult i16 [[X]], 44 -; CHECK-NEXT: store i16 [[S]], i16* [[P:%.*]] -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 [[X:%.*]], i16 44) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i16, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i16 [[MATH]], i16* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] ; %s = add i16 %x, -44 %ov = icmp ult i16 %x, 44 @@ -248,10 +253,11 @@ define i1 @usubo_ugt_constant_op1_i8(i8 %x, i8* %p) { ; CHECK-LABEL: @usubo_ugt_constant_op1_i8( -; CHECK-NEXT: [[OV:%.*]] = icmp ugt i8 45, [[X:%.*]] -; CHECK-NEXT: [[S:%.*]] = add i8 [[X]], -45 -; CHECK-NEXT: store i8 [[S]], i8* [[P:%.*]] -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[X:%.*]], i8 45) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i8, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i8 [[MATH]], i8* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] ; %ov = icmp ugt i8 45, %x %s = add i8 %x, -45 @@ -263,10 +269,11 @@ define i1 @usubo_eq_constant1_op1_i32(i32 %x, i32* %p) { ; CHECK-LABEL: @usubo_eq_constant1_op1_i32( -; CHECK-NEXT: [[S:%.*]] = add i32 [[X:%.*]], -1 -; CHECK-NEXT: [[OV:%.*]] = icmp eq i32 [[X]], 0 -; CHECK-NEXT: store i32 [[S]], i32* [[P:%.*]] -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[X:%.*]], i32 1) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i32 [[MATH]], i32* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] ; %s = add i32 %x, -1 %ov = icmp eq i32 %x, 0 @@ -283,14 +290,15 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] ; CHECK: t: -; CHECK-NEXT: [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: store i64 [[S]], i64* [[P:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 +; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]] ; CHECK-NEXT: br i1 [[COND]], label [[END:%.*]], label [[F]] ; CHECK: f: ; CHECK-NEXT: ret i1 [[COND]] ; CHECK: end: -; CHECK-NEXT: [[OV:%.*]] = icmp ult i64 [[X]], [[Y]] -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: ret i1 [[OV1]] ; entry: br i1 %cond, label %t, label %f @@ -319,10 +327,11 @@ ; CHECK: f: ; CHECK-NEXT: ret i1 [[COND]] ; CHECK: end: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[X]], [[Y]] -; CHECK-NEXT: [[S:%.*]] = sub i64 [[X]], [[Y]] -; CHECK-NEXT: store i64 [[S]], i64* [[P:%.*]] -; CHECK-NEXT: ret i1 [[TMP0]] +; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X]], i64 [[Y]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 +; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] ; entry: br i1 %cond, label %t, label %f