diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3854,14 +3854,28 @@ // If we are shifting by N-X where N == 0 mod Size, then just shift by -X // to generate a NEG instead of a SUB of a constant. } else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C && - Add0C->getAPIntValue() != 0 && - Add0C->getAPIntValue().urem(Size) == 0) { + Add0C->getZExtValue() != 0) { + EVT SubVT = ShiftAmt.getValueType(); + SDValue X; + if (Add0C->getZExtValue() % Size == 0) + X = Add1; + else if (Size == 64 && Add0C->getZExtValue() % 32 == 0) { + // We have a 64-bit shift by (n*32-x), turn it into -(x+n*32). + // This is mainly beneficial if we already compute (x+n*32). + if (Add1.getOpcode() == ISD::TRUNCATE) { + Add1 = Add1.getOperand(0); + SubVT = Add1.getValueType(); + } + X = CurDAG->getNode(ISD::ADD, DL, SubVT, Add1, + CurDAG->getZExtOrTrunc(Add0, DL, SubVT)); + insertDAGNode(*CurDAG, OrigShiftAmt, X); + } else + return false; // Insert a negate op. // TODO: This isn't guaranteed to replace the sub if there is a logic cone // that uses it that's not a shift. - EVT SubVT = ShiftAmt.getValueType(); SDValue Zero = CurDAG->getConstant(0, DL, SubVT); - SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1); + SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, X); NewShiftAmt = Neg; // Insert these operands into a valid topological order so they can diff --git a/llvm/test/CodeGen/X86/64-bit-shift-by-32-minus-y.ll b/llvm/test/CodeGen/X86/64-bit-shift-by-32-minus-y.ll --- a/llvm/test/CodeGen/X86/64-bit-shift-by-32-minus-y.ll +++ b/llvm/test/CodeGen/X86/64-bit-shift-by-32-minus-y.ll @@ -8,16 +8,17 @@ ; X64-NOBMI2-LABEL: t0: ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movq %rdi, %rax -; X64-NOBMI2-NEXT: movb $32, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: leaq 32(%rsi), %rcx +; X64-NOBMI2-NEXT: negq %rcx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: t0: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $32, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: shlxq %rax, %rdi, %rax +; X64-BMI2-NEXT: addq $32, %rsi +; X64-BMI2-NEXT: negq %rsi +; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax ; X64-BMI2-NEXT: retq ; ; X32-NOBMI2-LABEL: t0: @@ -228,16 +229,17 @@ ; X64-NOBMI2-LABEL: t4: ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movq %rdi, %rax -; X64-NOBMI2-NEXT: movb $96, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: leaq 96(%rsi), %rcx +; X64-NOBMI2-NEXT: negq %rcx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: t4: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $96, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: shlxq %rax, %rdi, %rax +; X64-BMI2-NEXT: addq $96, %rsi +; X64-BMI2-NEXT: negq %rsi +; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax ; X64-BMI2-NEXT: retq ; ; X32-NOBMI2-LABEL: t4: @@ -282,21 +284,21 @@ define i64 @t5_cse(i64 %val, i64 %shamt, i64*%dst) nounwind { ; X64-NOBMI2-LABEL: t5_cse: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movq %rsi, %rcx ; X64-NOBMI2-NEXT: movq %rdi, %rax -; X64-NOBMI2-NEXT: leaq 32(%rsi), %rcx +; X64-NOBMI2-NEXT: addq $32, %rcx ; X64-NOBMI2-NEXT: movq %rcx, (%rdx) -; X64-NOBMI2-NEXT: movb $32, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: negq %rcx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: t5_cse: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: leaq 32(%rsi), %rax -; X64-BMI2-NEXT: movq %rax, (%rdx) -; X64-BMI2-NEXT: movb $32, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: shlxq %rax, %rdi, %rax +; X64-BMI2-NEXT: addq $32, %rsi +; X64-BMI2-NEXT: movq %rsi, (%rdx) +; X64-BMI2-NEXT: negq %rsi +; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax ; X64-BMI2-NEXT: retq ; ; X32-NOBMI2-LABEL: t5_cse: