Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -443,6 +443,14 @@ bool matchBEXTRFromAnd(SDNode *Node); + /// If the high bits of an 'and' operation are known zero, check if setting + /// the high bits of an 'and' constant operand can produce a smaller + /// encoding by creating a small negative immediate rather than a large + /// positive one. This reverses a transform in SimplifyDemandedBits that + /// shrinks mask constants by clearing bits. Return 'true' if the node is + /// replaced. + bool shrinkAndImmediate(SDNode *N); + bool isMaskZeroExtended(SDNode *N) const; }; } @@ -2429,6 +2437,45 @@ return true; } +bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) { + // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't + // have immediate operands. + MVT VT = And->getSimpleValueType(0); + if (VT != MVT::i32 && VT != MVT::i64) + return false; + + auto *Mask = dyn_cast(And->getOperand(1)); + if (!Mask) + return false; + + // Bail out if the constant is already negative. + APInt MaskVal = Mask->getAPIntValue(); + unsigned MaskLZ = MaskVal.countLeadingZeros(); + if (!MaskLZ) + return false; + + // We need to set a string of mask bits at least as long as the high zero bits + // in the current constant to have any chance of narrowing. + SDValue And0 = And->getOperand(0); + KnownBits Known; + CurDAG->computeKnownBits(And0, Known); + if (Known.countMinLeadingZeros() < MaskLZ) + return false; + + // Don't bother changing the constant if it won't allow smaller encoding. + APInt NegVal = MaskVal | APInt::getHighBitsSet(VT.getSizeInBits(), MaskLZ); + unsigned MinWidth = NegVal.getMinSignedBits(); + if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32)) + return false; + + SDLoc DL(And); + SDValue NewMask = CurDAG->getConstant(NegVal, DL, VT); + SDValue NewAnd = CurDAG->getNode(ISD::AND, DL, VT, And0, NewMask); + ReplaceNode(And, NewAnd.getNode()); + SelectCode(NewAnd.getNode()); + return true; +} + void X86DAGToDAGISel::Select(SDNode *Node) { MVT NVT = Node->getSimpleValueType(0); unsigned Opc, MOpc; @@ -2483,9 +2530,10 @@ } case ISD::AND: - // Try to match BEXTR/BEXTRI instruction. if (matchBEXTRFromAnd(Node)) return; + if (shrinkAndImmediate(Node)) + return; LLVM_FALLTHROUGH; case ISD::OR: Index: test/CodeGen/X86/and-encoding.ll =================================================================== --- test/CodeGen/X86/and-encoding.ll +++ test/CodeGen/X86/and-encoding.ll @@ -47,8 +47,7 @@ ; CHECK-LABEL: lopped32_32to8: ; CHECK: # %bb.0: ; CHECK-NEXT: shrl $4, %edi # encoding: [0xc1,0xef,0x04] -; CHECK-NEXT: andl $268435440, %edi # encoding: [0x81,0xe7,0xf0,0xff,0xff,0x0f] -; CHECK-NEXT: # imm = 0xFFFFFF0 +; CHECK-NEXT: andl $-16, %edi # encoding: [0x83,0xe7,0xf0] ; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] ; CHECK-NEXT: retq # encoding: [0xc3] %shr = lshr i32 %x, 4 @@ -62,8 +61,7 @@ ; CHECK-LABEL: lopped64_32to8: ; CHECK: # %bb.0: ; CHECK-NEXT: shrq $36, %rdi # encoding: [0x48,0xc1,0xef,0x24] -; CHECK-NEXT: andl $268435440, %edi # encoding: [0x81,0xe7,0xf0,0xff,0xff,0x0f] -; CHECK-NEXT: # imm = 0xFFFFFF0 +; CHECK-NEXT: andq $-16, %rdi # encoding: [0x48,0x83,0xe7,0xf0] ; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] ; CHECK-NEXT: retq # encoding: [0xc3] %shr = lshr i64 %x, 36 @@ -77,9 +75,8 @@ ; CHECK-LABEL: lopped64_64to8: ; CHECK: # %bb.0: ; CHECK-NEXT: shrq $4, %rdi # encoding: [0x48,0xc1,0xef,0x04] -; CHECK-NEXT: movabsq $1152921504606846960, %rax # encoding: [0x48,0xb8,0xf0,0xff,0xff,0xff,0xff,0xff,0xff,0x0f] -; CHECK-NEXT: # imm = 0xFFFFFFFFFFFFFF0 -; CHECK-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8] +; CHECK-NEXT: andq $-16, %rdi # encoding: [0x48,0x83,0xe7,0xf0] +; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] ; CHECK-NEXT: retq # encoding: [0xc3] %shr = lshr i64 %x, 4 %and = and i64 %shr, 1152921504606846960 @@ -92,9 +89,9 @@ ; CHECK-LABEL: lopped64_64to32: ; CHECK: # %bb.0: ; CHECK-NEXT: shrq $4, %rdi # encoding: [0x48,0xc1,0xef,0x04] -; CHECK-NEXT: movabsq $1152921504605863920, %rax # encoding: [0x48,0xb8,0xf0,0xff,0xf0,0xff,0xff,0xff,0xff,0x0f] -; CHECK-NEXT: # imm = 0xFFFFFFFFFF0FFF0 -; CHECK-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8] +; CHECK-NEXT: andq $-983056, %rdi # encoding: [0x48,0x81,0xe7,0xf0,0xff,0xf0,0xff] +; CHECK-NEXT: # imm = 0xFFF0FFF0 +; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] ; CHECK-NEXT: retq # encoding: [0xc3] %shr = lshr i64 %x, 4 %and = and i64 %shr, 1152921504605863920 @@ -112,8 +109,7 @@ ; CHECK-NEXT: # imm = 0xF0F0F0F1 ; CHECK-NEXT: imulq %rcx, %rax # encoding: [0x48,0x0f,0xaf,0xc1] ; CHECK-NEXT: shrq $36, %rax # encoding: [0x48,0xc1,0xe8,0x24] -; CHECK-NEXT: andl $268435328, %eax # encoding: [0x25,0x80,0xff,0xff,0x0f] -; CHECK-NEXT: # imm = 0xFFFFF80 +; CHECK-NEXT: andl $-128, %eax # encoding: [0x83,0xe0,0x80] ; CHECK-NEXT: # kill: def %eax killed %eax killed %rax ; CHECK-NEXT: retq # encoding: [0xc3] %div = udiv i32 %x, 17 Index: test/CodeGen/X86/divide-by-constant.ll =================================================================== --- test/CodeGen/X86/divide-by-constant.ll +++ test/CodeGen/X86/divide-by-constant.ll @@ -49,7 +49,6 @@ ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: imull $171, %eax, %eax ; X32-NEXT: shrl $9, %eax -; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: # kill: def %al killed %al killed %eax ; X32-NEXT: retl ; @@ -57,7 +56,6 @@ ; X64: # %bb.0: # %entry ; X64-NEXT: imull $171, %esi, %eax ; X64-NEXT: shrl $9, %eax -; X64-NEXT: movzwl %ax, %eax ; X64-NEXT: # kill: def %al killed %al killed %eax ; X64-NEXT: retq entry: @@ -168,7 +166,6 @@ ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: imull $211, %eax, %eax ; X32-NEXT: shrl $13, %eax -; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: # kill: def %al killed %al killed %eax ; X32-NEXT: retl ; @@ -178,7 +175,6 @@ ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: imull $211, %eax, %eax ; X64-NEXT: shrl $13, %eax -; X64-NEXT: movzwl %ax, %eax ; X64-NEXT: # kill: def %al killed %al killed %eax ; X64-NEXT: retq %div = udiv i8 %x, 78 @@ -193,7 +189,6 @@ ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: imull $71, %eax, %eax ; X32-NEXT: shrl $11, %eax -; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: # kill: def %al killed %al killed %eax ; X32-NEXT: retl ; @@ -203,7 +198,6 @@ ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: imull $71, %eax, %eax ; X64-NEXT: shrl $11, %eax -; X64-NEXT: movzwl %ax, %eax ; X64-NEXT: # kill: def %al killed %al killed %eax ; X64-NEXT: retq %div = udiv i8 %x, 116 Index: test/CodeGen/X86/known-bits.ll =================================================================== --- test/CodeGen/X86/known-bits.ll +++ test/CodeGen/X86/known-bits.ll @@ -13,7 +13,6 @@ ; X32-NEXT: movzbl (%eax), %eax ; X32-NEXT: imull $101, %eax, %eax ; X32-NEXT: shrl $14, %eax -; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: vmovd %eax, %xmm0 ; X32-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -51,7 +50,6 @@ ; X64-NEXT: movzbl (%rdi), %eax ; X64-NEXT: imull $101, %eax, %eax ; X64-NEXT: shrl $14, %eax -; X64-NEXT: movzwl %ax, %eax ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: vmovd %eax, %xmm0 ; X64-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero Index: test/CodeGen/X86/popcnt.ll =================================================================== --- test/CodeGen/X86/popcnt.ll +++ test/CodeGen/X86/popcnt.ll @@ -71,7 +71,7 @@ ; X32-NEXT: andl $13107, %eax # imm = 0x3333 ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andl $32752, %ecx # imm = 0x7FF0 +; X32-NEXT: andl $-16, %ecx ; X32-NEXT: shrl $4, %ecx ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: andl $3855, %ecx # imm = 0xF0F @@ -94,7 +94,7 @@ ; X64-NEXT: andl $13107, %edi # imm = 0x3333 ; X64-NEXT: addl %eax, %edi ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $32752, %eax # imm = 0x7FF0 +; X64-NEXT: andl $-16, %eax ; X64-NEXT: shrl $4, %eax ; X64-NEXT: addl %edi, %eax ; X64-NEXT: andl $3855, %eax # imm = 0xF0F Index: test/CodeGen/X86/pr33844.ll =================================================================== --- test/CodeGen/X86/pr33844.ll +++ test/CodeGen/X86/pr33844.ll @@ -15,7 +15,7 @@ ; CHECK-NEXT: shrl $31, %ecx ; CHECK-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF ; CHECK-NEXT: shrl $31, %ecx -; CHECK-NEXT: andl $62, %ecx +; CHECK-NEXT: andl $-2, %ecx ; CHECK-NEXT: andl $-536870912, %eax # imm = 0xE0000000 ; CHECK-NEXT: orl %ecx, %eax ; CHECK-NEXT: movl %eax, {{.*}}(%rip) Index: test/CodeGen/X86/shift-pair.ll =================================================================== --- test/CodeGen/X86/shift-pair.ll +++ test/CodeGen/X86/shift-pair.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: test: ; CHECK: # %bb.0: ; CHECK-NEXT: shrq $54, %rdi -; CHECK-NEXT: andl $1020, %edi # imm = 0x3FC +; CHECK-NEXT: andq $-4, %rdi ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq %B = lshr i64 %A, 56 Index: test/CodeGen/X86/urem-i8-constant.ll =================================================================== --- test/CodeGen/X86/urem-i8-constant.ll +++ test/CodeGen/X86/urem-i8-constant.ll @@ -9,7 +9,6 @@ ; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: imull $111, %ecx, %eax ; CHECK-NEXT: shrl $12, %eax -; CHECK-NEXT: movzwl %ax, %eax ; CHECK-NEXT: movb $37, %dl ; CHECK-NEXT: # kill: def %al killed %al killed %eax ; CHECK-NEXT: mulb %dl Index: test/CodeGen/X86/vector-sext.ll =================================================================== --- test/CodeGen/X86/vector-sext.ll +++ test/CodeGen/X86/vector-sext.ll @@ -2190,23 +2190,13 @@ ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movzbl (%rdi), %eax ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $3, %ecx -; SSE2-NEXT: andl $1, %ecx +; SSE2-NEXT: shrl $7, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $2, %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: movd %ecx, %xmm2 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: movl %eax, %ecx +; SSE2-NEXT: shrl $6, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm1 -; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE2-NEXT: movl %eax, %ecx ; SSE2-NEXT: shrl $5, %ecx ; SSE2-NEXT: andl $1, %ecx @@ -2216,15 +2206,24 @@ ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm2 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $6, %ecx +; SSE2-NEXT: shrl $3, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 -; SSE2-NEXT: shrl $7, %eax -; SSE2-NEXT: movzwl %ax, %eax -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; SSE2-NEXT: movl %eax, %ecx +; SSE2-NEXT: shrl $2, %ecx +; SSE2-NEXT: andl $1, %ecx +; SSE2-NEXT: movd %ecx, %xmm3 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] +; SSE2-NEXT: movl %eax, %ecx +; SSE2-NEXT: andl $1, %ecx +; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: shrl %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] @@ -2239,23 +2238,13 @@ ; SSSE3: # %bb.0: # %entry ; SSSE3-NEXT: movzbl (%rdi), %eax ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $3, %ecx -; SSSE3-NEXT: andl $1, %ecx +; SSSE3-NEXT: shrl $7, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $2, %ecx -; SSSE3-NEXT: andl $1, %ecx -; SSSE3-NEXT: movd %ecx, %xmm2 -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSSE3-NEXT: movl %eax, %ecx +; SSSE3-NEXT: shrl $6, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm1 -; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl %ecx -; SSSE3-NEXT: andl $1, %ecx -; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSSE3-NEXT: movl %eax, %ecx ; SSSE3-NEXT: shrl $5, %ecx ; SSSE3-NEXT: andl $1, %ecx @@ -2265,15 +2254,24 @@ ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm2 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $6, %ecx +; SSSE3-NEXT: shrl $3, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 -; SSSE3-NEXT: shrl $7, %eax -; SSSE3-NEXT: movzwl %ax, %eax -; SSSE3-NEXT: movd %eax, %xmm3 -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; SSSE3-NEXT: movl %eax, %ecx +; SSSE3-NEXT: shrl $2, %ecx +; SSSE3-NEXT: andl $1, %ecx +; SSSE3-NEXT: movd %ecx, %xmm3 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] +; SSSE3-NEXT: movl %eax, %ecx +; SSSE3-NEXT: andl $1, %ecx +; SSSE3-NEXT: movd %ecx, %xmm1 +; SSSE3-NEXT: shrl %eax +; SSSE3-NEXT: andl $1, %eax +; SSSE3-NEXT: movd %eax, %xmm0 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] @@ -2315,7 +2313,6 @@ ; SSE41-NEXT: andl $1, %ecx ; SSE41-NEXT: pinsrw $6, %ecx, %xmm1 ; SSE41-NEXT: shrl $7, %eax -; SSE41-NEXT: movzwl %ax, %eax ; SSE41-NEXT: pinsrw $7, %eax, %xmm1 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; SSE41-NEXT: pslld $31, %xmm0 @@ -3001,52 +2998,32 @@ ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movzwl (%rdi), %eax ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $7, %ecx -; SSE2-NEXT: andl $1, %ecx +; SSE2-NEXT: shrl $15, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $6, %ecx +; SSE2-NEXT: shrl $14, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm1 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $5, %ecx +; SSE2-NEXT: shrl $13, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $4, %ecx +; SSE2-NEXT: shrl $12, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm2 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $3, %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: movd %ecx, %xmm0 -; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $2, %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: movd %ecx, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: movd %ecx, %xmm1 -; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: movd %ecx, %xmm0 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; SSE2-NEXT: movl %eax, %ecx ; SSE2-NEXT: shrl $11, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movl %eax, %ecx ; SSE2-NEXT: shrl $10, %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: movd %ecx, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE2-NEXT: movl %eax, %ecx ; SSE2-NEXT: shrl $9, %ecx ; SSE2-NEXT: andl $1, %ecx @@ -3056,26 +3033,45 @@ ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $13, %ecx +; SSE2-NEXT: shrl $7, %ecx +; SSE2-NEXT: andl $1, %ecx +; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: movl %eax, %ecx +; SSE2-NEXT: shrl $6, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $12, %ecx +; SSE2-NEXT: shrl $5, %ecx +; SSE2-NEXT: andl $1, %ecx +; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: movl %eax, %ecx +; SSE2-NEXT: shrl $4, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $14, %ecx +; SSE2-NEXT: shrl $3, %ecx +; SSE2-NEXT: andl $1, %ecx +; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: movl %eax, %ecx +; SSE2-NEXT: shrl $2, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm2 -; SSE2-NEXT: shrl $15, %eax -; SSE2-NEXT: movzwl %ax, %eax +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE2-NEXT: movl %eax, %ecx +; SSE2-NEXT: andl $1, %ecx +; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: shrl %eax +; SSE2-NEXT: andl $1, %eax ; SSE2-NEXT: movd %eax, %xmm4 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -3090,52 +3086,32 @@ ; SSSE3: # %bb.0: # %entry ; SSSE3-NEXT: movzwl (%rdi), %eax ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $7, %ecx -; SSSE3-NEXT: andl $1, %ecx +; SSSE3-NEXT: shrl $15, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $6, %ecx +; SSSE3-NEXT: shrl $14, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm1 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $5, %ecx +; SSSE3-NEXT: shrl $13, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $4, %ecx +; SSSE3-NEXT: shrl $12, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm2 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $3, %ecx -; SSSE3-NEXT: andl $1, %ecx -; SSSE3-NEXT: movd %ecx, %xmm0 -; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $2, %ecx -; SSSE3-NEXT: andl $1, %ecx -; SSSE3-NEXT: movd %ecx, %xmm3 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: andl $1, %ecx -; SSSE3-NEXT: movd %ecx, %xmm1 -; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl %ecx -; SSSE3-NEXT: andl $1, %ecx -; SSSE3-NEXT: movd %ecx, %xmm0 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] -; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; SSSE3-NEXT: movl %eax, %ecx ; SSSE3-NEXT: shrl $11, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movl %eax, %ecx ; SSSE3-NEXT: shrl $10, %ecx ; SSSE3-NEXT: andl $1, %ecx -; SSSE3-NEXT: movd %ecx, %xmm2 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSSE3-NEXT: movd %ecx, %xmm1 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSSE3-NEXT: movl %eax, %ecx ; SSSE3-NEXT: shrl $9, %ecx ; SSSE3-NEXT: andl $1, %ecx @@ -3145,26 +3121,45 @@ ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $13, %ecx +; SSSE3-NEXT: shrl $7, %ecx +; SSSE3-NEXT: andl $1, %ecx +; SSSE3-NEXT: movd %ecx, %xmm1 +; SSSE3-NEXT: movl %eax, %ecx +; SSSE3-NEXT: shrl $6, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm2 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $12, %ecx +; SSSE3-NEXT: shrl $5, %ecx +; SSSE3-NEXT: andl $1, %ecx +; SSSE3-NEXT: movd %ecx, %xmm1 +; SSSE3-NEXT: movl %eax, %ecx +; SSSE3-NEXT: shrl $4, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm3 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $14, %ecx +; SSSE3-NEXT: shrl $3, %ecx +; SSSE3-NEXT: andl $1, %ecx +; SSSE3-NEXT: movd %ecx, %xmm1 +; SSSE3-NEXT: movl %eax, %ecx +; SSSE3-NEXT: shrl $2, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm2 -; SSSE3-NEXT: shrl $15, %eax -; SSSE3-NEXT: movzwl %ax, %eax +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSSE3-NEXT: movl %eax, %ecx +; SSSE3-NEXT: andl $1, %ecx +; SSSE3-NEXT: movd %ecx, %xmm1 +; SSSE3-NEXT: shrl %eax +; SSSE3-NEXT: andl $1, %eax ; SSSE3-NEXT: movd %eax, %xmm4 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -3238,7 +3233,6 @@ ; SSE41-NEXT: andl $1, %ecx ; SSE41-NEXT: pinsrb $14, %ecx, %xmm1 ; SSE41-NEXT: shrl $15, %eax -; SSE41-NEXT: movzwl %ax, %eax ; SSE41-NEXT: pinsrb $15, %eax, %xmm1 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; SSE41-NEXT: psllw $15, %xmm0 Index: test/CodeGen/X86/win64_frame.ll =================================================================== --- test/CodeGen/X86/win64_frame.ll +++ test/CodeGen/X86/win64_frame.ll @@ -177,9 +177,8 @@ ; ALL-NEXT: movq %rsp, %rbx ; ALL-NEXT: movl 288(%rbp), %esi ; ALL-NEXT: movl %ecx, %eax -; ALL-NEXT: leaq 15(,%rax,4), %rcx -; ALL-NEXT: movabsq $34359738352, %rax # imm = 0x7FFFFFFF0 -; ALL-NEXT: andq %rcx, %rax +; ALL-NEXT: leaq 15(,%rax,4), %rax +; ALL-NEXT: andq $-16, %rax ; ALL-NEXT: callq __chkstk ; ALL-NEXT: subq %rax, %rsp ; ALL-NEXT: subq $32, %rsp Index: test/CodeGen/X86/x86-64-baseptr.ll =================================================================== --- test/CodeGen/X86/x86-64-baseptr.ll +++ test/CodeGen/X86/x86-64-baseptr.ll @@ -21,16 +21,15 @@ ; CHECK-NEXT: subq $32, %rsp ; CHECK-NEXT: movq %rsp, %rbx ; CHECK-NEXT: callq helper +; CHECK-NEXT: movq %rsp, %rcx ; CHECK-NEXT: movl %eax, %eax ; CHECK-NEXT: leaq 31(,%rax,4), %rax -; CHECK-NEXT: movabsq $34359738336, %rcx # imm = 0x7FFFFFFE0 -; CHECK-NEXT: andq %rax, %rcx -; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq %rax, %rdx -; CHECK-NEXT: subq %rcx, %rdx -; CHECK-NEXT: negq %rcx +; CHECK-NEXT: andq $-32, %rax +; CHECK-NEXT: movq %rcx, %rdx +; CHECK-NEXT: subq %rax, %rdx ; CHECK-NEXT: movq %rdx, %rsp -; CHECK-NEXT: movl $0, (%rax,%rcx) +; CHECK-NEXT: negq %rax +; CHECK-NEXT: movl $0, (%rcx,%rax) ; CHECK-NEXT: leaq -8(%rbp), %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp Index: test/CodeGen/X86/zext-fold.ll =================================================================== --- test/CodeGen/X86/zext-fold.ll +++ test/CodeGen/X86/zext-fold.ll @@ -6,7 +6,7 @@ ; CHECK-LABEL: test1: ; CHECK: # %bb.0: ; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: andl $224, %eax +; CHECK-NEXT: andl $-32, %eax ; CHECK-NEXT: retl %A = and i8 %x, -32 %B = zext i8 %A to i32 @@ -19,7 +19,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: andl $224, %ecx +; CHECK-NEXT: andl $-32, %ecx ; CHECK-NEXT: orl $63, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: retl @@ -41,7 +41,7 @@ ; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: pushl %eax -; CHECK-NEXT: andl $224, %eax +; CHECK-NEXT: andl $-32, %eax ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: calll use ; CHECK-NEXT: addl $28, %esp