Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -17320,12 +17320,14 @@ // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y + // (select (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z ) ^ y + // (select (and (x , 0x1) == 0), y, (z | y) ) -> (-(and (x , 0x1)) & z ) | y if (Cond.getOpcode() == X86ISD::SETCC && Cond.getOperand(1).getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1).getOperand(1))) { SDValue Cmp = Cond.getOperand(1); - - unsigned CondCode =cast(Cond.getOperand(0))->getZExtValue(); + unsigned CondCode = + cast(Cond.getOperand(0))->getZExtValue(); if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (CondCode == X86::COND_E || CondCode == X86::COND_NE)) { @@ -17362,6 +17364,43 @@ if (!isNullConstant(Op2)) Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y); return Res; + } else if (!Subtarget.hasCMov() && CondCode == X86::COND_E && + Cmp.getOperand(0).getOpcode() == ISD::AND && + isOneConstant(Cmp.getOperand(0).getOperand(1))) { + SDValue CmpOp0 = Cmp.getOperand(0); + SDValue Src1, Src2; + // true if Op2 is XOR or OR operator and one of its operands + // is equal to Op1 + // ( a , a op b) || ( b , a op b) + auto isOrXorPattern = [&]() { + if ((Op2.getOpcode() == ISD::XOR || Op2.getOpcode() == ISD::OR) && + (Op2.getOperand(0) == Op1 || Op2.getOperand(1) == Op1)) { + Src1 = + Op2.getOperand(0) == Op1 ? Op2.getOperand(1) : Op2.getOperand(0); + Src2 = Op1; + return true; + } + return false; + }; + + if (isOrXorPattern()) { + SDValue Neg; + unsigned int CmpSz = CmpOp0.getSimpleValueType().getSizeInBits(); + // we need mask of all zeros or ones with same size of the other + // operands. + if (CmpSz > VT.getSizeInBits()) + Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpOp0); + else if (CmpSz < VT.getSizeInBits()) + Neg = DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpOp0.getOperand(0)), + DAG.getConstant(1, DL, VT)); + else + Neg = Cmp; + SDValue Mask = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + Neg); // -(and (x, 0x1)) + SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z + return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2); // And Op y + } } } Index: llvm/trunk/test/CodeGen/X86/select.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/select.ll +++ llvm/trunk/test/CodeGen/X86/select.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=generic | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM +; RUN: llc < %s -mtriple=i386-intel-elfiamcu | FileCheck %s --check-prefix=MCU ; PR5757 %0 = type { i64, i32 } @@ -635,3 +636,45 @@ %B122 = fadd float %Sl59, undef ret void } + +define i16 @select_xor_1(i16 %A, i8 %cond) { +; CHECK-LABEL: select_xor_1: +; MCU: andl $1, %edx +; MCU-NEXT: negl %edx +; MCU-NEXT: andl $43, %edx +; MCU-NEXT: xorl %edx, %eax +entry: + %and = and i8 %cond, 1 + %cmp10 = icmp eq i8 %and, 0 + %0 = xor i16 %A, 43 + %1 = select i1 %cmp10, i16 %A, i16 %0 + ret i16 %1 +} + +define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) { +; CHECK-LABEL: select_xor_2: +; MCU: andl $1, %ecx +; MCU-NEXT: negl %ecx +; MCU-NEXT: andl %edx, %ecx +; MCU-NEXT: xorl %ecx, %eax +entry: + %and = and i8 %cond, 1 + %cmp10 = icmp eq i8 %and, 0 + %0 = xor i32 %B, %A + %1 = select i1 %cmp10, i32 %A, i32 %0 + ret i32 %1 +} + +define i32 @select_or(i32 %A, i32 %B, i8 %cond) { +; CHECK-LABEL: select_or: +; MCU: andl $1, %ecx +; MCU-NEXT: negl %ecx +; MCU-NEXT: andl %edx, %ecx +; MCU-NEXT: orl %ecx, %eax +entry: + %and = and i8 %cond, 1 + %cmp10 = icmp eq i8 %and, 0 + %0 = or i32 %B, %A + %1 = select i1 %cmp10, i32 %A, i32 %0 + ret i32 %1 +}