diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9288,31 +9288,43 @@ // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper. // sra (add (shl X, N1C), AddC), N1C --> // sext (add (trunc X to (width - N1C)), AddC') - if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && - N0.getOperand(0).getOpcode() == ISD::SHL && - N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) { - if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) { - SDValue Shl = N0.getOperand(0); - // Determine what the truncate's type would be and ask the target if that - // is a free operation. - LLVMContext &Ctx = *DAG.getContext(); - unsigned ShiftAmt = N1C->getZExtValue(); - EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); - if (VT.isVector()) - TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount()); - - // TODO: The simple type check probably belongs in the default hook - // implementation and/or target-specific overrides (because - // non-simple types likely require masking when legalized), but that - // restriction may conflict with other transforms. - if (TruncVT.isSimple() && isTypeLegal(TruncVT) && - TLI.isTruncateFree(VT, TruncVT)) { - SDLoc DL(N); - SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); - SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt). - trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT); - SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC); - return DAG.getSExtOrTrunc(Add, DL, VT); + // sra (sub AddC, (shl X, N1C)), N1C --> + // sext (sub AddC1',(trunc X to (width - N1C))) + if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C && + N0.hasOneUse()) { + bool IsAdd = N0.getOpcode() == ISD::ADD; + SDValue Shl = N0.getOperand(IsAdd ? 0 : 1); + if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 && + Shl.hasOneUse()) { + if (ConstantSDNode *AddC = + isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) { + // Determine what the truncate's type would be and ask the target if + // that is a free operation. + LLVMContext &Ctx = *DAG.getContext(); + unsigned ShiftAmt = N1C->getZExtValue(); + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount()); + + // TODO: The simple type check probably belongs in the default hook + // implementation and/or target-specific overrides (because + // non-simple types likely require masking when legalized), but + // that restriction may conflict with other transforms. + if (TruncVT.isSimple() && isTypeLegal(TruncVT) && + TLI.isTruncateFree(VT, TruncVT)) { + SDLoc DL(N); + SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); + SDValue ShiftC = + DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc( + TruncVT.getScalarSizeInBits()), + DL, TruncVT); + SDValue Add; + if (IsAdd) + Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC); + else + Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc); + return DAG.getSExtOrTrunc(Add, DL, VT); + } } } } diff --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll --- a/llvm/test/CodeGen/X86/shift-combine.ll +++ b/llvm/test/CodeGen/X86/shift-combine.ll @@ -419,6 +419,7 @@ unreachable } +; The mul here is the equivalent of (neg (shl X, 32)). define i64 @ashr_add_neg_shl_i32(i64 %r) nounwind { ; X32-LABEL: ashr_add_neg_shl_i32: ; X32: # %bb.0: @@ -430,10 +431,9 @@ ; ; X64-LABEL: ashr_add_neg_shl_i32: ; X64: # %bb.0: -; X64-NEXT: shlq $32, %rdi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: subq %rdi, %rax -; X64-NEXT: sarq $32, %rax +; X64-NEXT: movl $1, %eax +; X64-NEXT: subl %edi, %eax +; X64-NEXT: cltq ; X64-NEXT: retq %conv = mul i64 %r, -4294967296 %sext = add i64 %conv, 4294967296 @@ -441,6 +441,7 @@ ret i64 %conv1 } +; The mul here is the equivalent of (neg (shl X, 56)). define i64 @ashr_add_neg_shl_i8(i64 %r) nounwind { ; X32-LABEL: ashr_add_neg_shl_i8: ; X32: # %bb.0: @@ -455,10 +456,9 @@ ; ; X64-LABEL: ashr_add_neg_shl_i8: ; X64: # %bb.0: -; X64-NEXT: shlq $56, %rdi -; X64-NEXT: movabsq $144115188075855872, %rax # imm = 0x200000000000000 -; X64-NEXT: subq %rdi, %rax -; X64-NEXT: sarq $56, %rax +; X64-NEXT: movb $2, %al +; X64-NEXT: subb %dil, %al +; X64-NEXT: movsbq %al, %rax ; X64-NEXT: retq %conv = mul i64 %r, -72057594037927936 %sext = add i64 %conv, 144115188075855872 @@ -466,42 +466,31 @@ ret i64 %conv1 } +; The mul here is the equivalent of (neg (shl X, 24)). define <4 x i32> @ashr_add_neg_shl_v4i8(<4 x i32> %r) nounwind { ; X32-LABEL: ashr_add_neg_shl_v4i8: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: shll $24, %edx -; X32-NEXT: shll $24, %esi -; X32-NEXT: shll $24, %ebx -; X32-NEXT: shll $24, %ebp -; X32-NEXT: movl $16777216, %ecx # imm = 0x1000000 -; X32-NEXT: movl $16777216, %edi # imm = 0x1000000 -; X32-NEXT: subl %ebp, %edi -; X32-NEXT: movl $16777216, %ebp # imm = 0x1000000 -; X32-NEXT: subl %ebx, %ebp -; X32-NEXT: movl $16777216, %ebx # imm = 0x1000000 -; X32-NEXT: subl %esi, %ebx -; X32-NEXT: subl %edx, %ecx -; X32-NEXT: sarl $24, %ecx -; X32-NEXT: sarl $24, %ebx -; X32-NEXT: sarl $24, %ebp -; X32-NEXT: sarl $24, %edi -; X32-NEXT: movl %edi, 12(%eax) -; X32-NEXT: movl %ebp, 8(%eax) -; X32-NEXT: movl %ebx, 4(%eax) -; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: movb $1, %cl +; X32-NEXT: movb $1, %dl +; X32-NEXT: subb {{[0-9]+}}(%esp), %dl +; X32-NEXT: movsbl %dl, %edx +; X32-NEXT: movb $1, %ch +; X32-NEXT: subb {{[0-9]+}}(%esp), %ch +; X32-NEXT: movsbl %ch, %esi +; X32-NEXT: movb $1, %ch +; X32-NEXT: subb {{[0-9]+}}(%esp), %ch +; X32-NEXT: movsbl %ch, %edi +; X32-NEXT: subb {{[0-9]+}}(%esp), %cl +; X32-NEXT: movsbl %cl, %ecx +; X32-NEXT: movl %ecx, 12(%eax) +; X32-NEXT: movl %edi, 8(%eax) +; X32-NEXT: movl %esi, 4(%eax) +; X32-NEXT: movl %edx, (%eax) ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi -; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp ; X32-NEXT: retl $4 ; ; X64-LABEL: ashr_add_neg_shl_v4i8: