Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -4302,7 +4302,109 @@ case ISD::XOR: if (tryShrinkShlLogicImm(Node)) return; - break; + + LLVM_FALLTHROUGH; + case ISD::ADD: + case ISD::SUB: { + // Try to avoid folding immediates with multiple uses for optsize. + // This code tries to select to register form directly to avoid going + // through the isel table which might fold the immediate. We can't change + // the patterns on the add/sub/and/or/xor with immediate paterns in the + // tablegen files to check immediate use count without making the patterns + // unavailable to the fast-isel table. + if (!OptForSize) + break; + + // Only handle i8/i16/i32/i64. + if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64) + break; + + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + ConstantSDNode *Cst = dyn_cast(N1); + if (!Cst) + break; + + int64_t Val = Cst->getSExtValue(); + + // Make sure its an immediate that is considered foldable. + // FIXME: Handle unsigned 32 bit immediates for 64-bit AND. + if (!isInt<8>(Val) && !isInt<32>(Val)) + break; + + // Check if we should avoid folding this immediate. + if (!shouldAvoidImmediateInstFormsForSize(N1.getNode())) + break; + + // We should not fold the immediate. So we need a register form instead. + unsigned ROpc, MOpc; + switch (NVT.SimpleTy) { + default: llvm_unreachable("Unexpected VT!"); + case MVT::i8: + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode!"); + case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break; + case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break; + case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break; + case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break; + case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break; + } + break; + case MVT::i16: + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode!"); + case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break; + case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break; + case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break; + case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break; + case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break; + } + break; + case MVT::i32: + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode!"); + case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break; + case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break; + case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break; + case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break; + case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break; + } + break; + case MVT::i64: + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode!"); + case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break; + case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break; + case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break; + case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break; + case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break; + } + break; + } + + // Ok this is a AND/OR/XOR/ADD/SUB with constant. + + // If this is a not a subtract, we can still try to fold a load. + if (Opcode != ISD::SUB) { + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { + SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; + SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); + MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); + // Update the chain. + ReplaceUses(N0.getValue(1), SDValue(CNode, 2)); + // Record the mem-refs + CurDAG->setNodeMemRefs(CNode, {cast(N0)->getMemOperand()}); + ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); + CurDAG->RemoveDeadNode(Node); + return; + } + } + + CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1); + return; + } case X86ISD::SMUL: // i16/i32/i64 are handled with isel patterns. Index: llvm/trunk/test/CodeGen/X86/popcnt.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/popcnt.ll +++ llvm/trunk/test/CodeGen/X86/popcnt.ll @@ -613,11 +613,12 @@ ; X32-NEXT: shrl %ecx ; X32-NEXT: andl $1431655765, %ecx # imm = 0x55555555 ; X32-NEXT: subl %ecx, %eax -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X32-NEXT: movl $858993459, %ecx # imm = 0x33333333 +; X32-NEXT: movl %eax, %edx +; X32-NEXT: andl %ecx, %edx ; X32-NEXT: shrl $2, %eax -; X32-NEXT: andl $858993459, %eax # imm = 0x33333333 -; X32-NEXT: addl %ecx, %eax +; X32-NEXT: andl %ecx, %eax +; X32-NEXT: addl %edx, %eax ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $4, %ecx ; X32-NEXT: addl %eax, %ecx @@ -632,11 +633,12 @@ ; X64-NEXT: shrl %eax ; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 ; X64-NEXT: subl %eax, %edi -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-NEXT: movl $858993459, %eax # imm = 0x33333333 +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: andl %eax, %ecx ; X64-NEXT: shrl $2, %edi -; X64-NEXT: andl $858993459, %edi # imm = 0x33333333 -; X64-NEXT: addl %eax, %edi +; X64-NEXT: andl %eax, %edi +; X64-NEXT: addl %ecx, %edi ; X64-NEXT: movl %edi, %eax ; X64-NEXT: shrl $4, %eax ; X64-NEXT: addl %edi, %eax @@ -661,40 +663,49 @@ define i64 @cnt64_optsize(i64 %x) nounwind readnone optsize { ; X32-NOSSE-LABEL: cnt64_optsize: ; X32-NOSSE: # %bb.0: +; X32-NOSSE-NEXT: pushl %ebx +; X32-NOSSE-NEXT: pushl %edi +; X32-NOSSE-NEXT: pushl %esi ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NOSSE-NEXT: movl %ecx, %edx ; X32-NOSSE-NEXT: shrl %edx -; X32-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X32-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555 +; X32-NOSSE-NEXT: andl %esi, %edx ; X32-NOSSE-NEXT: subl %edx, %ecx -; X32-NOSSE-NEXT: movl %ecx, %edx -; X32-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X32-NOSSE-NEXT: movl $858993459, %edx # imm = 0x33333333 +; X32-NOSSE-NEXT: movl %ecx, %edi +; X32-NOSSE-NEXT: andl %edx, %edi ; X32-NOSSE-NEXT: shrl $2, %ecx -; X32-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333 -; X32-NOSSE-NEXT: addl %edx, %ecx -; X32-NOSSE-NEXT: movl %ecx, %edx -; X32-NOSSE-NEXT: shrl $4, %edx -; X32-NOSSE-NEXT: addl %ecx, %edx -; X32-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F -; X32-NOSSE-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101 -; X32-NOSSE-NEXT: shrl $24, %ecx -; X32-NOSSE-NEXT: movl %eax, %edx -; X32-NOSSE-NEXT: shrl %edx -; X32-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555 -; X32-NOSSE-NEXT: subl %edx, %eax -; X32-NOSSE-NEXT: movl %eax, %edx -; X32-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X32-NOSSE-NEXT: andl %edx, %ecx +; X32-NOSSE-NEXT: addl %edi, %ecx +; X32-NOSSE-NEXT: movl %ecx, %edi +; X32-NOSSE-NEXT: shrl $4, %edi +; X32-NOSSE-NEXT: addl %ecx, %edi +; X32-NOSSE-NEXT: movl $252645135, %ecx # imm = 0xF0F0F0F +; X32-NOSSE-NEXT: andl %ecx, %edi +; X32-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101 +; X32-NOSSE-NEXT: shrl $24, %edi +; X32-NOSSE-NEXT: movl %eax, %ebx +; X32-NOSSE-NEXT: shrl %ebx +; X32-NOSSE-NEXT: andl %esi, %ebx +; X32-NOSSE-NEXT: subl %ebx, %eax +; X32-NOSSE-NEXT: movl %eax, %esi +; X32-NOSSE-NEXT: andl %edx, %esi ; X32-NOSSE-NEXT: shrl $2, %eax -; X32-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333 -; X32-NOSSE-NEXT: addl %edx, %eax +; X32-NOSSE-NEXT: andl %edx, %eax +; X32-NOSSE-NEXT: addl %esi, %eax ; X32-NOSSE-NEXT: movl %eax, %edx ; X32-NOSSE-NEXT: shrl $4, %edx ; X32-NOSSE-NEXT: addl %eax, %edx -; X32-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X32-NOSSE-NEXT: andl %ecx, %edx ; X32-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101 ; X32-NOSSE-NEXT: shrl $24, %eax -; X32-NOSSE-NEXT: addl %ecx, %eax +; X32-NOSSE-NEXT: addl %edi, %eax ; X32-NOSSE-NEXT: xorl %edx, %edx +; X32-NOSSE-NEXT: popl %esi +; X32-NOSSE-NEXT: popl %edi +; X32-NOSSE-NEXT: popl %ebx ; X32-NOSSE-NEXT: retl ; ; X64-LABEL: cnt64_optsize: @@ -781,77 +792,83 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize { ; X32-NOSSE-LABEL: cnt128_optsize: ; X32-NOSSE: # %bb.0: +; X32-NOSSE-NEXT: pushl %ebp ; X32-NOSSE-NEXT: pushl %ebx ; X32-NOSSE-NEXT: pushl %edi ; X32-NOSSE-NEXT: pushl %esi -; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NOSSE-NEXT: movl %edi, %ebx +; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NOSSE-NEXT: movl %ebx, %ecx +; X32-NOSSE-NEXT: shrl %ecx +; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 +; X32-NOSSE-NEXT: andl %edi, %ecx +; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 +; X32-NOSSE-NEXT: subl %ecx, %ebx +; X32-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333 +; X32-NOSSE-NEXT: movl %ebx, %ebp +; X32-NOSSE-NEXT: andl %ecx, %ebp +; X32-NOSSE-NEXT: shrl $2, %ebx +; X32-NOSSE-NEXT: andl %ecx, %ebx +; X32-NOSSE-NEXT: addl %ebp, %ebx +; X32-NOSSE-NEXT: movl %ebx, %ebp +; X32-NOSSE-NEXT: shrl $4, %ebp +; X32-NOSSE-NEXT: addl %ebx, %ebp +; X32-NOSSE-NEXT: movl %eax, %ebx ; X32-NOSSE-NEXT: shrl %ebx -; X32-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555 -; X32-NOSSE-NEXT: subl %ebx, %edi -; X32-NOSSE-NEXT: movl %edi, %ebx -; X32-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333 -; X32-NOSSE-NEXT: shrl $2, %edi -; X32-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 -; X32-NOSSE-NEXT: addl %ebx, %edi -; X32-NOSSE-NEXT: movl %edi, %ebx -; X32-NOSSE-NEXT: shrl $4, %ebx -; X32-NOSSE-NEXT: addl %edi, %ebx -; X32-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F -; X32-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101 +; X32-NOSSE-NEXT: andl %edi, %ebx +; X32-NOSSE-NEXT: subl %ebx, %eax +; X32-NOSSE-NEXT: movl %eax, %ebx +; X32-NOSSE-NEXT: andl %ecx, %ebx +; X32-NOSSE-NEXT: shrl $2, %eax +; X32-NOSSE-NEXT: andl %ecx, %eax +; X32-NOSSE-NEXT: addl %ebx, %eax +; X32-NOSSE-NEXT: movl %eax, %edi +; X32-NOSSE-NEXT: shrl $4, %edi +; X32-NOSSE-NEXT: addl %eax, %edi +; X32-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F +; X32-NOSSE-NEXT: andl %ebx, %ebp +; X32-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101 +; X32-NOSSE-NEXT: shrl $24, %eax +; X32-NOSSE-NEXT: andl %ebx, %edi +; X32-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101 ; X32-NOSSE-NEXT: shrl $24, %edi -; X32-NOSSE-NEXT: movl %esi, %ebx -; X32-NOSSE-NEXT: shrl %ebx -; X32-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555 -; X32-NOSSE-NEXT: subl %ebx, %esi -; X32-NOSSE-NEXT: movl %esi, %ebx -; X32-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333 +; X32-NOSSE-NEXT: addl %eax, %edi +; X32-NOSSE-NEXT: movl %esi, %eax +; X32-NOSSE-NEXT: shrl %eax +; X32-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555 +; X32-NOSSE-NEXT: andl %ebp, %eax +; X32-NOSSE-NEXT: subl %eax, %esi +; X32-NOSSE-NEXT: movl %esi, %eax +; X32-NOSSE-NEXT: andl %ecx, %eax ; X32-NOSSE-NEXT: shrl $2, %esi -; X32-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333 -; X32-NOSSE-NEXT: addl %ebx, %esi -; X32-NOSSE-NEXT: movl %esi, %ebx -; X32-NOSSE-NEXT: shrl $4, %ebx -; X32-NOSSE-NEXT: addl %esi, %ebx -; X32-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F -; X32-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101 -; X32-NOSSE-NEXT: shrl $24, %esi -; X32-NOSSE-NEXT: addl %edi, %esi -; X32-NOSSE-NEXT: movl %edx, %edi -; X32-NOSSE-NEXT: shrl %edi -; X32-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; X32-NOSSE-NEXT: subl %edi, %edx -; X32-NOSSE-NEXT: movl %edx, %edi -; X32-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 +; X32-NOSSE-NEXT: andl %ecx, %esi +; X32-NOSSE-NEXT: addl %eax, %esi +; X32-NOSSE-NEXT: movl %esi, %eax +; X32-NOSSE-NEXT: shrl $4, %eax +; X32-NOSSE-NEXT: addl %esi, %eax +; X32-NOSSE-NEXT: movl %edx, %esi +; X32-NOSSE-NEXT: shrl %esi +; X32-NOSSE-NEXT: andl %ebp, %esi +; X32-NOSSE-NEXT: subl %esi, %edx +; X32-NOSSE-NEXT: movl %edx, %esi +; X32-NOSSE-NEXT: andl %ecx, %esi ; X32-NOSSE-NEXT: shrl $2, %edx -; X32-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 -; X32-NOSSE-NEXT: addl %edi, %edx -; X32-NOSSE-NEXT: movl %edx, %edi -; X32-NOSSE-NEXT: shrl $4, %edi -; X32-NOSSE-NEXT: addl %edx, %edi -; X32-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; X32-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101 -; X32-NOSSE-NEXT: shrl $24, %edx -; X32-NOSSE-NEXT: movl %ecx, %edi -; X32-NOSSE-NEXT: shrl %edi -; X32-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; X32-NOSSE-NEXT: subl %edi, %ecx -; X32-NOSSE-NEXT: movl %ecx, %edi -; X32-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 -; X32-NOSSE-NEXT: shrl $2, %ecx -; X32-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333 -; X32-NOSSE-NEXT: addl %edi, %ecx -; X32-NOSSE-NEXT: movl %ecx, %edi -; X32-NOSSE-NEXT: shrl $4, %edi -; X32-NOSSE-NEXT: addl %ecx, %edi -; X32-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; X32-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101 -; X32-NOSSE-NEXT: shrl $24, %ecx +; X32-NOSSE-NEXT: andl %ecx, %edx +; X32-NOSSE-NEXT: addl %esi, %edx +; X32-NOSSE-NEXT: movl %edx, %ecx +; X32-NOSSE-NEXT: shrl $4, %ecx ; X32-NOSSE-NEXT: addl %edx, %ecx -; X32-NOSSE-NEXT: addl %esi, %ecx +; X32-NOSSE-NEXT: andl %ebx, %eax +; X32-NOSSE-NEXT: andl %ebx, %ecx +; X32-NOSSE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 +; X32-NOSSE-NEXT: shrl $24, %eax +; X32-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101 +; X32-NOSSE-NEXT: shrl $24, %ecx +; X32-NOSSE-NEXT: addl %eax, %ecx +; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NOSSE-NEXT: addl %edi, %ecx ; X32-NOSSE-NEXT: xorl %edx, %edx ; X32-NOSSE-NEXT: movl %edx, 12(%eax) ; X32-NOSSE-NEXT: movl %edx, 8(%eax) @@ -860,6 +877,7 @@ ; X32-NOSSE-NEXT: popl %esi ; X32-NOSSE-NEXT: popl %edi ; X32-NOSSE-NEXT: popl %ebx +; X32-NOSSE-NEXT: popl %ebp ; X32-NOSSE-NEXT: retl $4 ; ; X64-LABEL: cnt128_optsize: Index: llvm/trunk/test/CodeGen/X86/pr27202.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr27202.ll +++ llvm/trunk/test/CodeGen/X86/pr27202.ll @@ -4,8 +4,8 @@ define i1 @foo(i32 %i) optsize { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $305419896, %edi # imm = 0x12345678 ; CHECK-NEXT: movl $305419896, %eax # imm = 0x12345678 +; CHECK-NEXT: andl %eax, %edi ; CHECK-NEXT: cmpl %eax, %edi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq @@ -17,8 +17,8 @@ define zeroext i1 @g(i32 %x) optsize { ; CHECK-LABEL: g: ; CHECK: # %bb.0: -; CHECK-NEXT: orl $1, %edi ; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: orl %eax, %edi ; CHECK-NEXT: cmpl %eax, %edi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq