Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -30256,22 +30256,37 @@ } if (!NewMul) { - assert(MulAmt != 0 && MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) - && "Both cases that could cause potential overflows should have " - "already been handled."); - if (isPowerOf2_64(MulAmt - 1)) - // (mul x, 2^N + 1) => (add (shl x, N), x) - NewMul = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), - DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), - DAG.getConstant(Log2_64(MulAmt - 1), DL, - MVT::i8))); - - else if (isPowerOf2_64(MulAmt + 1)) - // (mul x, 2^N - 1) => (sub (shl x, N), x) - NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT, - N->getOperand(0), - DAG.getConstant(Log2_64(MulAmt + 1), - DL, MVT::i8)), N->getOperand(0)); + assert(MulAmt != 0 && + MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) && + "Both cases that could cause potential overflows should have " + "already been handled."); + int64_t SignMulAmt = C->getSExtValue(); + if ((SignMulAmt != INT64_MIN) && (SignMulAmt != INT64_MAX) && + (SignMulAmt != -INT64_MAX)) { + int NumSign = SignMulAmt > 0 ? 1 : -1; + bool IsPowerOf2_64PlusOne = isPowerOf2_64(NumSign * SignMulAmt - 1); + bool IsPowerOf2_64MinusOne = isPowerOf2_64(NumSign * SignMulAmt + 1); + if (IsPowerOf2_64PlusOne) { + // (mul x, 2^N + 1) => (add (shl x, N), x) + NewMul = DAG.getNode( + ISD::ADD, DL, VT, N->getOperand(0), + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(Log2_64(NumSign * SignMulAmt - 1), DL, + MVT::i8))); + } else if (IsPowerOf2_64MinusOne) { + // (mul x, 2^N - 1) => (sub (shl x, N), x) + NewMul = DAG.getNode( + ISD::SUB, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(Log2_64(NumSign * SignMulAmt + 1), DL, + MVT::i8)), + N->getOperand(0)); + } + // To negate, subtract the number from zero + if ((IsPowerOf2_64PlusOne || IsPowerOf2_64MinusOne) && NumSign == -1) + NewMul = + DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul); + } } if (NewMul) Index: llvm/trunk/test/CodeGen/X86/imul.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/imul.ll +++ llvm/trunk/test/CodeGen/X86/imul.ll @@ -171,3 +171,233 @@ %mul = mul i64 %A, 18446744073709551615 ret i64 %mul } + +define i32 @test(i32 %a) { +; X64-LABEL: test: +; X64: # BB#0: # %entry +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $5, %eax +; X64-NEXT: subl %edi, %eax +; X64-NEXT: retq +; +; X86-LABEL: test: +; X86: # BB#0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shll $5, %eax +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: retl +entry: + %tmp3 = mul i32 %a, 31 + ret i32 %tmp3 +} + +define i32 @test1(i32 %a) { +; X64-LABEL: test1: +; X64: # BB#0: # %entry +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $5, %eax +; X64-NEXT: subl %edi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: retq +; +; X86-LABEL: test1: +; X86: # BB#0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shll $5, %eax +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: negl %eax +; X86-NEXT: retl +entry: + %tmp3 = mul i32 %a, -31 + ret i32 %tmp3 +} + + +define i32 @test2(i32 %a) { +; X64-LABEL: test2: +; X64: # BB#0: # %entry +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $5, %eax +; X64-NEXT: leal (%rax,%rdi), %eax +; X64-NEXT: retq +; +; X86-LABEL: test2: +; X86: # BB#0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shll $5, %eax +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: retl +entry: + %tmp3 = mul i32 %a, 33 + ret i32 %tmp3 +} + +define i32 @test3(i32 %a) { +; X64-LABEL: test3: +; X64: # BB#0: # %entry +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $5, %eax +; X64-NEXT: leal (%rax,%rdi), %eax +; X64-NEXT: negl %eax +; X64-NEXT: retq +; +; X86-LABEL: test3: +; X86: # BB#0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shll $5, %eax +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: negl %eax +; X86-NEXT: retl +entry: + %tmp3 = mul i32 %a, -33 + ret i32 %tmp3 +} + +define i64 @test4(i64 %a) { +; X64-LABEL: test4: +; X64: # BB#0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shlq $5, %rax +; X64-NEXT: subq %rdi, %rax +; X64-NEXT: retq +; +; X86-LABEL: test4: +; X86: # BB#0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $5, %ecx +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: movl $31, %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: retl +entry: + %tmp3 = mul i64 %a, 31 + ret i64 %tmp3 +} + +define i64 @test5(i64 %a) { +; X64-LABEL: test5: +; X64: # BB#0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shlq $5, %rax +; X64-NEXT: subq %rdi, %rax +; X64-NEXT: negq %rax +; X64-NEXT: retq +; +; X86-LABEL: test5: +; X86: # BB#0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: .Lcfi0: +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .Lcfi1: +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: shll $5, %esi +; X86-NEXT: subl %eax, %esi +; X86-NEXT: movl $-31, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: subl %ecx, %edx +; X86-NEXT: subl %esi, %edx +; X86-NEXT: popl %esi +; X86-NEXT: retl +entry: + %tmp3 = mul i64 %a, -31 + ret i64 %tmp3 +} + + +define i64 @test6(i64 %a) { +; X64-LABEL: test6: +; X64: # BB#0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shlq $5, %rax +; X64-NEXT: leaq (%rax,%rdi), %rax +; X64-NEXT: retq +; +; X86-LABEL: test6: +; X86: # BB#0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $5, %ecx +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: movl $33, %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: retl +entry: + %tmp3 = mul i64 %a, 33 + ret i64 %tmp3 +} + +define i64 @test7(i64 %a) { +; X64-LABEL: test7: +; X64: # BB#0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shlq $5, %rax +; X64-NEXT: leaq (%rax,%rdi), %rax +; X64-NEXT: negq %rax +; X64-NEXT: retq +; +; X86-LABEL: test7: +; X86: # BB#0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: .Lcfi2: +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .Lcfi3: +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: shll $5, %esi +; X86-NEXT: addl %eax, %esi +; X86-NEXT: movl $-33, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: subl %ecx, %edx +; X86-NEXT: subl %esi, %edx +; X86-NEXT: popl %esi +; X86-NEXT: retl +entry: + %tmp3 = mul i64 %a, -33 + ret i64 %tmp3 +} + +define i64 @testOverflow(i64 %a) { +; X64-LABEL: testOverflow: +; X64: # BB#0: # %entry +; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF +; X64-NEXT: imulq %rdi, %rax +; X64-NEXT: retq +; +; X86-LABEL: testOverflow: +; X86: # BB#0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: .Lcfi4: +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .Lcfi5: +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $-1, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: shll $31, %esi +; X86-NEXT: subl %ecx, %esi +; X86-NEXT: addl %esi, %edx +; X86-NEXT: subl {{[0-9]+}}(%esp), %edx +; X86-NEXT: popl %esi +; X86-NEXT: retl +entry: + %tmp3 = mul i64 %a, 9223372036854775807 + ret i64 %tmp3 +}