Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -319,7 +319,7 @@ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Custom); setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); setOperationAction(ISD::FREM , MVT::f32 , Expand); @@ -16330,6 +16330,32 @@ return DAG.getNode(X86ISD::VTRUNC, dl, VT, V); } +static SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) { + SDValue N0 = Op.getOperand(0); + EVT VT = Op.getValueType(); + EVT SrcVT = cast(Op.getOperand(1))->getVT(); + SDLoc DL(Op); + + // An in-register sign-extend of a boolean is a negation: + // 'true' (1) sign-extended is -1. + // 'false' (0) sign-extended is 0. + // + // However, we must mask the high bits of the source operand because the + // SIGN_EXTEND_INREG does not guarantee that the high bits are already zero. + // The mask op can be optimized away by the combiner if it can determine that + // the top bits are already zero. + // + // TODO: Almost all targets would benefit from this transform, so if we can + // move it to common codegen with a target hook, that would be a better + // general-purpose solution. + if (SrcVT.getScalarSizeInBits() == 1) { + SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, DAG.getConstant(1, DL, VT)); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), And); + } + + return SDValue(); +} + static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -22034,6 +22060,7 @@ case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG); case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG); + case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::SIGN_EXTEND_VECTOR_INREG: return LowerSIGN_EXTEND_VECTOR_INREG(Op, Subtarget, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); Index: test/CodeGen/X86/negate-i1.ll =================================================================== --- test/CodeGen/X86/negate-i1.ll +++ test/CodeGen/X86/negate-i1.ll @@ -2,19 +2,22 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 ; RUN: llc < %s -mtriple=i386-unknown-unknown | FileCheck %s --check-prefix=X32 +; In all cases, if 'zeroext' is not specified on the param, then we must mask the +; value before negation. If 'zeroext' is specified, the 'and' is optimized away. + define i8 @select_i8_neg1_or_0(i1 %a) { ; X64-LABEL: select_i8_neg1_or_0: ; X64: # BB#0: -; X64-NEXT: shlb $7, %dil -; X64-NEXT: sarb $7, %dil +; X64-NEXT: andb $1, %dil +; X64-NEXT: negb %dil ; X64-NEXT: movl %edi, %eax ; X64-NEXT: retq ; ; X32-LABEL: select_i8_neg1_or_0: ; X32: # BB#0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: shlb $7, %al -; X32-NEXT: sarb $7, %al +; X32-NEXT: andb $1, %al +; X32-NEXT: negb %al ; X32-NEXT: retl ; %b = sext i1 %a to i8 @@ -24,16 +27,14 @@ define i8 @select_i8_neg1_or_0_zeroext(i1 zeroext %a) { ; X64-LABEL: select_i8_neg1_or_0_zeroext: ; X64: # BB#0: -; X64-NEXT: shlb $7, %dil -; X64-NEXT: sarb $7, %dil +; X64-NEXT: negb %dil ; X64-NEXT: movl %edi, %eax ; X64-NEXT: retq ; ; X32-LABEL: select_i8_neg1_or_0_zeroext: ; X32: # BB#0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: shlb $7, %al -; X32-NEXT: sarb $7, %al +; X32-NEXT: negb %al ; X32-NEXT: retl ; %b = sext i1 %a to i8 @@ -43,16 +44,16 @@ define i16 @select_i16_neg1_or_0(i1 %a) { ; X64-LABEL: select_i16_neg1_or_0: ; X64: # BB#0: -; X64-NEXT: shll $15, %edi -; X64-NEXT: sarw $15, %di +; X64-NEXT: andl $1, %edi +; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax ; X64-NEXT: retq ; ; X32-LABEL: select_i16_neg1_or_0: ; X32: # BB#0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $15, %eax -; X32-NEXT: sarw $15, %ax +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: andl $1, %eax +; X32-NEXT: negl %eax ; X32-NEXT: # kill: %AX %AX %EAX ; X32-NEXT: retl ; @@ -64,16 +65,14 @@ ; X64-LABEL: select_i16_neg1_or_0_zeroext: ; X64: # BB#0: ; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: shll $15, %eax -; X64-NEXT: sarw $15, %ax +; X64-NEXT: negl %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq ; ; X32-LABEL: select_i16_neg1_or_0_zeroext: ; X32: # BB#0: ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $15, %eax -; X32-NEXT: sarw $15, %ax +; X32-NEXT: negl %eax ; X32-NEXT: # kill: %AX %AX %EAX ; X32-NEXT: retl ; @@ -84,16 +83,16 @@ define i32 @select_i32_neg1_or_0(i1 %a) { ; X64-LABEL: select_i32_neg1_or_0: ; X64: # BB#0: -; X64-NEXT: shll $31, %edi -; X64-NEXT: sarl $31, %edi +; X64-NEXT: andl $1, %edi +; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax ; X64-NEXT: retq ; ; X32-LABEL: select_i32_neg1_or_0: ; X32: # BB#0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $31, %eax -; X32-NEXT: sarl $31, %eax +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: andl $1, %eax +; X32-NEXT: negl %eax ; X32-NEXT: retl ; %b = sext i1 %a to i32 @@ -104,15 +103,13 @@ ; X64-LABEL: select_i32_neg1_or_0_zeroext: ; X64: # BB#0: ; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: shll $31, %eax -; X64-NEXT: sarl $31, %eax +; X64-NEXT: negl %eax ; X64-NEXT: retq ; ; X32-LABEL: select_i32_neg1_or_0_zeroext: ; X32: # BB#0: ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $31, %eax -; X32-NEXT: sarl $31, %eax +; X32-NEXT: negl %eax ; X32-NEXT: retl ; %b = sext i1 %a to i32 @@ -123,17 +120,18 @@ ; X64-LABEL: select_i64_neg1_or_0: ; X64: # BB#0: ; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: shlq $63, %rdi -; X64-NEXT: sarq $63, %rdi +; X64-NEXT: andl $1, %edi +; X64-NEXT: negq %rdi ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq ; ; X32-LABEL: select_i64_neg1_or_0: ; X32: # BB#0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $31, %eax -; X32-NEXT: sarl $31, %eax +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: andl $1, %eax +; X32-NEXT: negl %eax ; X32-NEXT: movl %eax, %edx +; X32-NEXT: sarl $31, %edx ; X32-NEXT: retl ; %b = sext i1 %a to i64 @@ -144,16 +142,15 @@ ; X64-LABEL: select_i64_neg1_or_0_zeroext: ; X64: # BB#0: ; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: shlq $63, %rax -; X64-NEXT: sarq $63, %rax +; X64-NEXT: negq %rax ; X64-NEXT: retq ; ; X32-LABEL: select_i64_neg1_or_0_zeroext: ; X32: # BB#0: ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $31, %eax -; X32-NEXT: sarl $31, %eax +; X32-NEXT: negl %eax ; X32-NEXT: movl %eax, %edx +; X32-NEXT: sarl $31, %edx ; X32-NEXT: retl ; %b = sext i1 %a to i64