Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -34124,6 +34124,23 @@ return SDValue(); } + +/// Fold a xor(setcc cond, val), 1 --> setcc (inverted(cond), val) +static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG) { + if (N->getOpcode() != ISD::XOR) + return SDValue(); + + SDValue LHS = N->getOperand(0); + auto *RHSC = dyn_cast(N->getOperand(1)); + if (!RHSC || RHSC->getZExtValue() != 1 || LHS->getOpcode() != X86ISD::SETCC) + return SDValue(); + + X86::CondCode NewCC = X86::GetOppositeBranchCondition( + X86::CondCode(LHS->getConstantOperandVal(0))); + SDLoc DL(N); + return getSETCC(NewCC, LHS->getOperand(1), DL, DAG); +} + static SDValue combineXor(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -34133,6 +34150,9 @@ if (DCI.isBeforeLegalizeOps()) return SDValue(); + if (SDValue SetCC = foldXor1SetCC(N, DAG)) + return SetCC; + if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG)) return RV; Index: test/CodeGen/X86/overflow-intrinsic-setcc-fold.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/overflow-intrinsic-setcc-fold.ll @@ -0,0 +1,151 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=generic -verify-machineinstrs | FileCheck %s + +; Check cc optimization of not(overflow) + +define i1 @saddo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: saddo.not.i32 +; CHECK: addl %esi, %edi +; CHECK-NEXT: setno %al + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @saddo.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: saddo.not.i64 +; CHECK: addq %rsi, %rdi +; CHECK-NEXT: setno %al + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @uaddo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: uaddo.not.i32 +; CHECK: addl %esi, %edi +; CHECK-NEXT: setae %al + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @uaddo.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: uaddo.not.i64 +; CHECK: addq %rsi, %rdi +; CHECK-NEXT: setae %al + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @ssubo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: ssubo.not.i32 +; CHECK: cmpl %esi, %edi +; CHECK-NEXT: setno %al + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @ssub.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: ssub.not.i64 +; CHECK: cmpq %rsi, %rdi +; CHECK-NEXT: setno %al + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @usubo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: usubo.not.i32 +; CHECK: cmpl %esi, %edi +; CHECK-NEXT: setae %al + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @usubo.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: usubo.not.i64 +; CHECK: cmpq %rsi, %rdi +; CHECK-NEXT: setae %al + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @smulo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: smulo.not.i32 +; CHECK: imull %esi, %edi +; CHECK-NEXT: setno %al + %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @smulo.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: smulo.not.i64 +; CHECK: imulq %rsi, %rdi +; CHECK-NEXT: setno %al + %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @umulo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: umulo.not.i32 +; CHECK: movl %edi, %eax +; CHECK-NEXT: mull %esi +; CHECK-NEXT: setno %al + %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @umulo.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: umulo.not.i64 +; CHECK: movq %rdi, %rax +; CHECK-NEXT: mulq %rsi +; CHECK-NEXT: setno %al + %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone +declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone +declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone +declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone +declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone +declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone +declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone +declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone +declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone +