diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3991,17 +3991,36 @@ // so we are not afraid that we might mess up BZHI/BEXTR pattern. SDValue NewShiftAmt; - if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { + if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB || + ShiftAmt->getOpcode() == ISD::XOR) { SDValue Add0 = ShiftAmt->getOperand(0); SDValue Add1 = ShiftAmt->getOperand(1); auto *Add0C = dyn_cast(Add0); auto *Add1C = dyn_cast(Add1); - // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X - // to avoid the ADD/SUB. + // If we are shifting by X+/-/^N where N == 0 mod Size, then just shift by X + // to avoid the ADD/SUB/XOR. if (Add1C && Add1C->getAPIntValue().urem(Size) == 0) { NewShiftAmt = Add0; - // If we are shifting by N-X where N == 0 mod Size, then just shift by -X - // to generate a NEG instead of a SUB of a constant. + + // If we are doing a NOT on just the lower bits with (Size*N-1) -/^ X + // we can replace it with a NOT. In the XOR case it may save some code + // size, in the SUB case it also may save a move. + } else if ((Add0C && Add0C->getAPIntValue().urem(Size) == Size - 1) || + (Add1C && Add1C->getAPIntValue().urem(Size) == Size - 1)) { + assert(Add0C == nullptr || Add1C == nullptr); + assert(Add0C != nullptr || Add1C != nullptr); + + auto *ConstValOp = Add0C == nullptr ? Add1C : Add0C; + EVT OpVT = ShiftAmt.getValueType(); + // ISelLowering will convert this to NOT already. + if (ConstValOp->isAllOnes()) + return false; + + NewShiftAmt = CurDAG->getNOT(DL, Add0C == nullptr ? Add0 : Add1, OpVT); + + insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); + // If we are shifting by N-X where N == 0 mod Size, then just shift by + // -X to generate a NEG instead of a SUB of a constant. } else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C && Add0C->getZExtValue() != 0) { EVT SubVT = ShiftAmt.getValueType(); diff --git a/llvm/test/CodeGen/X86/legalize-shift-64.ll b/llvm/test/CodeGen/X86/legalize-shift-64.ll --- a/llvm/test/CodeGen/X86/legalize-shift-64.ll +++ b/llvm/test/CodeGen/X86/legalize-shift-64.ll @@ -10,7 +10,7 @@ ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: shrl %edx -; CHECK-NEXT: xorb $31, %cl +; CHECK-NEXT: notb %cl ; CHECK-NEXT: shrl %cl, %edx ; CHECK-NEXT: retl %conv = zext i32 %xx to i64 diff --git a/llvm/test/CodeGen/X86/not-shift.ll b/llvm/test/CodeGen/X86/not-shift.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/not-shift.ll @@ -0,0 +1,637 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86-NOBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86-NOBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86-NOBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86-BMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64-NOBMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64-NOBMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64-NOBMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64-BMI2 + + + +define i64 @sub63_shiftl64(i64 %val, i64 %cnt) nounwind { +; X86-NOBMI2-LABEL: sub63_shiftl64: +; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movb $63, %cl +; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-NOBMI2-NEXT: movl %esi, %eax +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: shldl %cl, %esi, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB0_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: .LBB0_2: +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: sub63_shiftl64: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb $63, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB0_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB0_2: +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: sub63_shiftl64: +; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: sub63_shiftl64: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq + %adjcnt = sub i64 63, %cnt + %result = shl i64 %val, %adjcnt + ret i64 %result +} + +define i64 @xor63_shiftr64(i64 %val, i64 %cnt) nounwind { +; X86-NOBMI2-LABEL: xor63_shiftr64: +; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorb $63, %cl +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: shrdl %cl, %esi, %eax +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB1_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: .LBB1_2: +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: xor63_shiftr64: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorb $63, %cl +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB1_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB1_2: +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: xor63_shiftr64: +; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shrq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: xor63_shiftr64: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq + %adjcnt = xor i64 %cnt, 63 + %result = lshr i64 %val, %adjcnt + ret i64 %result +} + +define i64 @sub127_shiftl64(i64 %val, i64 %cnt) nounwind { +; X86-NOBMI2-LABEL: sub127_shiftl64: +; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorb $127, %cl +; X86-NOBMI2-NEXT: movl %esi, %eax +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: shldl %cl, %esi, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB2_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: .LBB2_2: +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: sub127_shiftl64: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorb $127, %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB2_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB2_2: +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: sub127_shiftl64: +; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: sub127_shiftl64: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq + %adjcnt = sub i64 127, %cnt + %result = shl i64 %val, %adjcnt + ret i64 %result +} + +define i64 @xor127_shiftr64(i64 %val, i64 %cnt) nounwind { +; X86-NOBMI2-LABEL: xor127_shiftr64: +; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorb $127, %cl +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: shrdl %cl, %esi, %eax +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB3_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: .LBB3_2: +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: xor127_shiftr64: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorb $127, %cl +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB3_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB3_2: +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: xor127_shiftr64: +; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shrq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: xor127_shiftr64: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq + %adjcnt = xor i64 %cnt, 127 + %result = lshr i64 %val, %adjcnt + ret i64 %result +} + +define i64 @xor64_shiftl64(i64 %val, i64 %cnt) nounwind { +; X86-NOBMI2-LABEL: xor64_shiftl64: +; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorb $64, %cl +; X86-NOBMI2-NEXT: movl %esi, %eax +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: shldl %cl, %esi, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB4_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: .LBB4_2: +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: xor64_shiftl64: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorb $64, %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB4_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB4_2: +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: xor64_shiftl64: +; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: xor64_shiftl64: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq + %adjcnt = xor i64 %cnt, 64 + %result = shl i64 %val, %adjcnt + ret i64 %result +} + +define i64 @sub1s_shiftr64(i64 %val, i64 %cnt) nounwind { +; X86-NOBMI2-LABEL: sub1s_shiftr64: +; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: notb %cl +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: shrdl %cl, %esi, %eax +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB5_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: .LBB5_2: +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: sub1s_shiftr64: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: notb %cl +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB5_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB5_2: +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: sub1s_shiftr64: +; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shrq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: sub1s_shiftr64: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq + %adjcnt = xor i64 %cnt, -1 + %result = lshr i64 %val, %adjcnt + ret i64 %result +} + +define i64 @xor1s_shiftl64(i64 %val, i64 %cnt) nounwind { +; X86-NOBMI2-LABEL: xor1s_shiftl64: +; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: notb %cl +; X86-NOBMI2-NEXT: movl %esi, %eax +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: shldl %cl, %esi, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB6_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: .LBB6_2: +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: xor1s_shiftl64: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: notb %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB6_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB6_2: +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: xor1s_shiftl64: +; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: xor1s_shiftl64: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq + %adjcnt = xor i64 %cnt, -1 + %result = shl i64 %val, %adjcnt + ret i64 %result +} + +define i32 @sub31_shiftr32(i32 %val, i32 %cnt) nounwind { +; X86-NOBMI2-LABEL: sub31_shiftr32: +; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: notb %cl +; X86-NOBMI2-NEXT: shrl %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: sub31_shiftr32: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: notb %al +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: sub31_shiftr32: +; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shrl %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: sub31_shiftr32: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: retq + %adjcnt = sub i32 31, %cnt + %result = lshr i32 %val, %adjcnt + ret i32 %result +} + +define i32 @xor31_shiftl32(i32 %val, i32 %cnt) nounwind { +; X86-NOBMI2-LABEL: xor31_shiftl32: +; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: notb %cl +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: xor31_shiftl32: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: notb %al +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: xor31_shiftl32: +; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: xor31_shiftl32: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shlxl %esi, %edi, %eax +; X64-BMI2-NEXT: retq + %adjcnt = xor i32 %cnt, 31 + %result = shl i32 %val, %adjcnt + ret i32 %result +} + +define i32 @sub63_shiftr32(i32 %val, i32 %cnt) nounwind { +; X86-NOBMI2-LABEL: sub63_shiftr32: +; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: notb %cl +; X86-NOBMI2-NEXT: shrl %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: sub63_shiftr32: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: notb %al +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: sub63_shiftr32: +; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shrl %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: sub63_shiftr32: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: retq + %adjcnt = sub i32 63, %cnt + %result = lshr i32 %val, %adjcnt + ret i32 %result +} + +define i32 @xor63_shiftl32(i32 %val, i32 %cnt) nounwind { +; X86-NOBMI2-LABEL: xor63_shiftl32: +; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: notb %cl +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: xor63_shiftl32: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: notb %al +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: xor63_shiftl32: +; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: xor63_shiftl32: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shlxl %esi, %edi, %eax +; X64-BMI2-NEXT: retq + %adjcnt = xor i32 %cnt, 63 + %result = shl i32 %val, %adjcnt + ret i32 %result +} + +define i32 @xor32_shiftr32(i32 %val, i32 %cnt) nounwind { +; X86-NOBMI2-LABEL: xor32_shiftr32: +; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: shrl %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: xor32_shiftr32: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: xor32_shiftr32: +; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shrl %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: xor32_shiftr32: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: retq + %adjcnt = xor i32 %cnt, 32 + %result = lshr i32 %val, %adjcnt + ret i32 %result +} + +define i32 @sub1s_shiftl32(i32 %val, i32 %cnt) nounwind { +; X86-NOBMI2-LABEL: sub1s_shiftl32: +; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: notb %cl +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: sub1s_shiftl32: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: notb %al +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: sub1s_shiftl32: +; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: sub1s_shiftl32: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shlxl %esi, %edi, %eax +; X64-BMI2-NEXT: retq + %adjcnt = xor i32 %cnt, -1 + %result = shl i32 %val, %adjcnt + ret i32 %result +} + +define i32 @xor1s_shiftr32(i32 %val, i32 %cnt) nounwind { +; X86-NOBMI2-LABEL: xor1s_shiftr32: +; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: notb %cl +; X86-NOBMI2-NEXT: shrl %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: xor1s_shiftr32: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: notb %al +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: xor1s_shiftr32: +; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shrl %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: xor1s_shiftr32: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: retq + %adjcnt = xor i32 %cnt, -1 + %result = lshr i32 %val, %adjcnt + ret i32 %result +}