Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42781,11 +42781,15 @@ if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8) return false; - // 8-bit multiply is probably not much cheaper than 32-bit multiply, and - // we have specializations to turn 32-bit multiply into LEA or other ops. + // TODO: Almost no 8-bit ops are desirable because they have no actual + // size/speed advantages vs. 32-bit ops, but they do have a major + // potential disadvantage by causing partial register stalls. + // + // 8-bit multiply/shl is probably not cheaper than 32-bit multiply/shl, and + // we have specializations to turn 32-bit multiply/shl into LEA or other ops. // Also, see the comment in "IsDesirableToPromoteOp" - where we additionally // check for a constant operand to the multiply. - if (Opc == ISD::MUL && VT == MVT::i8) + if ((Opc == ISD::MUL || Opc == ISD::SHL) && VT == MVT::i8) return false; // i16 instruction encodings are longer and some i16 instructions are slow, Index: llvm/test/CodeGen/X86/bt.ll =================================================================== --- llvm/test/CodeGen/X86/bt.ll +++ llvm/test/CodeGen/X86/bt.ll @@ -1150,19 +1150,18 @@ define zeroext i1 @demanded_with_known_zeroes(i32 %bit, i32 %bits) { ; X86-LABEL: demanded_with_known_zeroes: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: shlb $2, %cl -; X86-NEXT: movzbl %cl, %ecx -; X86-NEXT: btl %ecx, %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shlb $2, %al +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: btl %eax, %ecx ; X86-NEXT: setb %al ; X86-NEXT: retl ; ; X64-LABEL: demanded_with_known_zeroes: ; X64: # %bb.0: # %entry -; X64-NEXT: shlb $2, %dil -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: btl %eax, %esi +; X64-NEXT: shll $2, %edi +; X64-NEXT: btl %edi, %esi ; X64-NEXT: setb %al ; X64-NEXT: retq entry: Index: llvm/test/CodeGen/X86/btc_bts_btr.ll =================================================================== --- llvm/test/CodeGen/X86/btc_bts_btr.ll +++ llvm/test/CodeGen/X86/btc_bts_btr.ll @@ -954,15 +954,15 @@ ; X64-LABEL: btr_32_mask_zeros: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: shlb $2, %sil +; X64-NEXT: shll $2, %esi ; X64-NEXT: btrl %esi, %eax ; X64-NEXT: retq ; ; X86-LABEL: btr_32_mask_zeros: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shlb $2, %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: btrl %ecx, %eax ; X86-NEXT: retl %1 = shl i32 %n, 2 @@ -977,15 +977,15 @@ ; X64-LABEL: bts_32_mask_zeros: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: shlb $2, %sil +; X64-NEXT: shll $2, %esi ; X64-NEXT: btsl %esi, %eax ; X64-NEXT: retq ; ; X86-LABEL: bts_32_mask_zeros: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shlb $2, %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: btsl %ecx, %eax ; X86-NEXT: retl %1 = shl i32 %n, 2 @@ -999,15 +999,15 @@ ; X64-LABEL: btc_32_mask_zeros: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: shlb $2, %sil +; X64-NEXT: shll $2, %esi ; X64-NEXT: btcl %esi, %eax ; X64-NEXT: retq ; ; X86-LABEL: btc_32_mask_zeros: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shlb $2, %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: btcl %ecx, %eax ; X86-NEXT: retl %1 = shl i32 %n, 2 @@ -1021,14 +1021,14 @@ ; X64-LABEL: btr_64_mask_zeros: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shlb $2, %sil +; X64-NEXT: shlq $2, %rsi ; X64-NEXT: btrq %rsi, %rax ; X64-NEXT: retq ; ; X86-LABEL: btr_64_mask_zeros: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: shlb $2, %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll $2, %ecx ; X86-NEXT: movl $1, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: shldl %cl, %eax, %edx @@ -1056,14 +1056,14 @@ ; X64-LABEL: bts_64_mask_zeros: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shlb $2, %sil +; X64-NEXT: shlq $2, %rsi ; X64-NEXT: btsq %rsi, %rax ; X64-NEXT: retq ; ; X86-LABEL: bts_64_mask_zeros: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: shlb $2, %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll $2, %ecx ; X86-NEXT: movl $1, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: shldl %cl, %eax, %edx @@ -1088,14 +1088,14 @@ ; X64-LABEL: btc_64_mask_zeros: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shlb $2, %sil +; X64-NEXT: shlq $2, %rsi ; X64-NEXT: btcq %rsi, %rax ; X64-NEXT: retq ; ; X86-LABEL: btc_64_mask_zeros: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: shlb $2, %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll $2, %ecx ; X86-NEXT: movl $1, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: shldl %cl, %eax, %edx Index: llvm/test/CodeGen/X86/rotate4.ll =================================================================== --- llvm/test/CodeGen/X86/rotate4.ll +++ llvm/test/CodeGen/X86/rotate4.ll @@ -633,9 +633,9 @@ define i32 @rotate_demanded_bits_3(i32, i32) { ; X86-LABEL: rotate_demanded_bits_3: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: addb %cl, %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: roll %cl, %eax ; X86-NEXT: retl ; Index: llvm/test/CodeGen/X86/scheduler-backtracking.ll =================================================================== --- llvm/test/CodeGen/X86/scheduler-backtracking.ll +++ llvm/test/CodeGen/X86/scheduler-backtracking.ll @@ -16,7 +16,7 @@ ; ILP-NEXT: pushq %rbx ; ILP-NEXT: movq %rdi, %rax ; ILP-NEXT: xorl %r8d, %r8d -; ILP-NEXT: addb %sil, %sil +; ILP-NEXT: addq %rsi, %rsi ; ILP-NEXT: addb $2, %sil ; ILP-NEXT: orb $1, %sil ; ILP-NEXT: movl $1, %r10d @@ -61,7 +61,7 @@ ; HYBRID-LABEL: test1: ; HYBRID: # %bb.0: ; HYBRID-NEXT: movq %rdi, %rax -; HYBRID-NEXT: addb %sil, %sil +; HYBRID-NEXT: addq %rsi, %rsi ; HYBRID-NEXT: addb $2, %sil ; HYBRID-NEXT: orb $1, %sil ; HYBRID-NEXT: movb $-128, %cl @@ -104,7 +104,7 @@ ; BURR-LABEL: test1: ; BURR: # %bb.0: ; BURR-NEXT: movq %rdi, %rax -; BURR-NEXT: addb %sil, %sil +; BURR-NEXT: addq %rsi, %rsi ; BURR-NEXT: addb $2, %sil ; BURR-NEXT: orb $1, %sil ; BURR-NEXT: movb $-128, %cl @@ -148,7 +148,7 @@ ; SRC: # %bb.0: ; SRC-NEXT: pushq %rbx ; SRC-NEXT: movq %rdi, %rax -; SRC-NEXT: addb %sil, %sil +; SRC-NEXT: addq %rsi, %rsi ; SRC-NEXT: addb $2, %sil ; SRC-NEXT: orb $1, %sil ; SRC-NEXT: movb $-128, %cl @@ -195,7 +195,7 @@ ; LIN-NEXT: movq %rdi, %rax ; LIN-NEXT: xorl %r9d, %r9d ; LIN-NEXT: movl $1, %r8d -; LIN-NEXT: addb %sil, %sil +; LIN-NEXT: addq %rsi, %rsi ; LIN-NEXT: addb $2, %sil ; LIN-NEXT: orb $1, %sil ; LIN-NEXT: movl $1, %edx Index: llvm/test/CodeGen/X86/select_const.ll =================================================================== --- llvm/test/CodeGen/X86/select_const.ll +++ llvm/test/CodeGen/X86/select_const.ll @@ -353,9 +353,9 @@ define i32 @select_pow2_diff_neg(i1 zeroext %cond) { ; CHECK-LABEL: select_pow2_diff_neg: ; CHECK: # %bb.0: -; CHECK-NEXT: shlb $4, %dil -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: orl $-25, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: shll $4, %edi +; CHECK-NEXT: leal -25(%rdi), %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -9, i32 -25 ret i32 %sel