diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3995,17 +3995,36 @@ // so we are not afraid that we might mess up BZHI/BEXTR pattern. SDValue NewShiftAmt; - if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { + if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB || + ShiftAmt->getOpcode() == ISD::XOR) { SDValue Add0 = ShiftAmt->getOperand(0); SDValue Add1 = ShiftAmt->getOperand(1); auto *Add0C = dyn_cast(Add0); auto *Add1C = dyn_cast(Add1); - // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X - // to avoid the ADD/SUB. + // If we are shifting by X+/-/^N where N == 0 mod Size, then just shift by X + // to avoid the ADD/SUB/XOR. if (Add1C && Add1C->getAPIntValue().urem(Size) == 0) { NewShiftAmt = Add0; - // If we are shifting by N-X where N == 0 mod Size, then just shift by -X - // to generate a NEG instead of a SUB of a constant. + + } else if (ShiftAmt->getOpcode() != ISD::ADD && + ((Add0C && Add0C->getAPIntValue().urem(Size) == Size - 1) || + (Add1C && Add1C->getAPIntValue().urem(Size) == Size - 1))) { + // If we are doing a NOT on just the lower bits with (Size*N-1) -/^ X + // we can replace it with a NOT. In the XOR case it may save some code + // size, in the SUB case it also may save a move. + assert(Add0C == nullptr || Add1C == nullptr); + + // We can only do N-X, not X-N + if (ShiftAmt->getOpcode() == ISD::SUB && Add0C == nullptr) + return false; + + auto *ConstValOp = Add0C == nullptr ? Add1C : Add0C; + EVT OpVT = ShiftAmt.getValueType(); + + NewShiftAmt = CurDAG->getNOT(DL, Add0C == nullptr ? Add0 : Add1, OpVT); + insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); + // If we are shifting by N-X where N == 0 mod Size, then just shift by + // -X to generate a NEG instead of a SUB of a constant. } else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C && Add0C->getZExtValue() != 0) { EVT SubVT = ShiftAmt.getValueType(); diff --git a/llvm/test/CodeGen/X86/legalize-shift-64.ll b/llvm/test/CodeGen/X86/legalize-shift-64.ll --- a/llvm/test/CodeGen/X86/legalize-shift-64.ll +++ b/llvm/test/CodeGen/X86/legalize-shift-64.ll @@ -10,7 +10,7 @@ ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: shrl %edx -; CHECK-NEXT: xorb $31, %cl +; CHECK-NEXT: notb %cl ; CHECK-NEXT: shrl %cl, %edx ; CHECK-NEXT: retl %conv = zext i32 %xx to i64 diff --git a/llvm/test/CodeGen/X86/not-shift.ll b/llvm/test/CodeGen/X86/not-shift.ll --- a/llvm/test/CodeGen/X86/not-shift.ll +++ b/llvm/test/CodeGen/X86/not-shift.ll @@ -50,17 +50,17 @@ ; ; X64-NOBMI2-LABEL: sub63_shiftl64: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movq %rsi, %rcx ; X64-NOBMI2-NEXT: movq %rdi, %rax -; X64-NOBMI2-NEXT: movb $63, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: sub63_shiftl64: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $63, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: shlxq %rax, %rdi, %rax +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax ; X64-BMI2-NEXT: retq %adjcnt = sub i64 63, %cnt %result = shl i64 %val, %adjcnt @@ -107,14 +107,14 @@ ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movq %rsi, %rcx ; X64-NOBMI2-NEXT: movq %rdi, %rax -; X64-NOBMI2-NEXT: xorb $63, %cl +; X64-NOBMI2-NEXT: notb %cl ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shrq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: xor63_shiftr64: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: xorb $63, %sil +; X64-BMI2-NEXT: notb %sil ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax ; X64-BMI2-NEXT: retq %adjcnt = xor i64 %cnt, 63 @@ -162,14 +162,14 @@ ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movq %rsi, %rcx ; X64-NOBMI2-NEXT: movq %rdi, %rax -; X64-NOBMI2-NEXT: xorb $127, %cl +; X64-NOBMI2-NEXT: notb %cl ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: sub127_shiftl64: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: xorb $127, %sil +; X64-BMI2-NEXT: notb %sil ; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax ; X64-BMI2-NEXT: retq %adjcnt = sub i64 127, %cnt @@ -217,14 +217,14 @@ ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movq %rsi, %rcx ; X64-NOBMI2-NEXT: movq %rdi, %rax -; X64-NOBMI2-NEXT: xorb $127, %cl +; X64-NOBMI2-NEXT: notb %cl ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shrq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: xor127_shiftr64: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: xorb $127, %sil +; X64-BMI2-NEXT: notb %sil ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax ; X64-BMI2-NEXT: retq %adjcnt = xor i64 %cnt, 127 @@ -272,14 +272,12 @@ ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movq %rsi, %rcx ; X64-NOBMI2-NEXT: movq %rdi, %rax -; X64-NOBMI2-NEXT: xorb $64, %cl ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: xor64_shiftl64: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: xorb $64, %sil ; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax ; X64-BMI2-NEXT: retq %adjcnt = xor i64 %cnt, 64 @@ -401,31 +399,31 @@ ; X86-NOBMI2-LABEL: sub31_shiftr32: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI2-NEXT: movb $31, %cl -; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: notb %cl ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: sub31_shiftr32: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movb $31, %al -; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: notb %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: sub31_shiftr32: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: movl %edi, %eax -; X64-NOBMI2-NEXT: movb $31, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shrl %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: sub31_shiftr32: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $31, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: shrxl %eax, %edi, %eax +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax ; X64-BMI2-NEXT: retq %adjcnt = sub i32 31, %cnt %result = lshr i32 %val, %adjcnt @@ -437,14 +435,14 @@ ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: xorb $31, %cl +; X86-NOBMI2-NEXT: notb %cl ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: xor31_shiftl32: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: xorb $31, %al +; X86-BMI2-NEXT: notb %al ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -452,14 +450,14 @@ ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: movl %edi, %eax -; X64-NOBMI2-NEXT: xorb $31, %cl +; X64-NOBMI2-NEXT: notb %cl ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: xor31_shiftl32: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: xorb $31, %sil +; X64-BMI2-NEXT: notb %sil ; X64-BMI2-NEXT: shlxl %esi, %edi, %eax ; X64-BMI2-NEXT: retq %adjcnt = xor i32 %cnt, 31 @@ -471,31 +469,31 @@ ; X86-NOBMI2-LABEL: sub63_shiftr32: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI2-NEXT: movb $63, %cl -; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: notb %cl ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: sub63_shiftr32: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movb $63, %al -; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: notb %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: sub63_shiftr32: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: movl %edi, %eax -; X64-NOBMI2-NEXT: movb $63, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shrl %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: sub63_shiftr32: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $63, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: shrxl %eax, %edi, %eax +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax ; X64-BMI2-NEXT: retq %adjcnt = sub i32 63, %cnt %result = lshr i32 %val, %adjcnt @@ -507,14 +505,14 @@ ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: xorb $63, %cl +; X86-NOBMI2-NEXT: notb %cl ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: xor63_shiftl32: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: xorb $63, %al +; X86-BMI2-NEXT: notb %al ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -522,14 +520,14 @@ ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: movl %edi, %eax -; X64-NOBMI2-NEXT: xorb $63, %cl +; X64-NOBMI2-NEXT: notb %cl ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: xor63_shiftl32: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: xorb $63, %sil +; X64-BMI2-NEXT: notb %sil ; X64-BMI2-NEXT: shlxl %esi, %edi, %eax ; X64-BMI2-NEXT: retq %adjcnt = xor i32 %cnt, 63 @@ -540,16 +538,14 @@ define i32 @xor32_shiftr32(i32 %val, i32 %cnt) nounwind { ; X86-NOBMI2-LABEL: xor32_shiftr32: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: xorb $32, %cl +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: xor32_shiftr32: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: xorb $32, %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -557,14 +553,12 @@ ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: movl %edi, %eax -; X64-NOBMI2-NEXT: xorb $32, %cl ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shrl %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: xor32_shiftr32: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: xorb $32, %sil ; X64-BMI2-NEXT: shrxl %esi, %edi, %eax ; X64-BMI2-NEXT: retq %adjcnt = xor i32 %cnt, 32