diff --git a/llvm/test/CodeGen/X86/dagcombine-cse.ll b/llvm/test/CodeGen/X86/dagcombine-cse.ll --- a/llvm/test/CodeGen/X86/dagcombine-cse.ll +++ b/llvm/test/CodeGen/X86/dagcombine-cse.ll @@ -41,3 +41,118 @@ %tmp48 = extractelement <4 x i32> %tmp47, i32 0 ; [#uses=1] ret i32 %tmp48 } + +; Test CSE for SDAG nodes with multiple results (UMUL_LOHI). +; Pre-committed before fix, so this does not demonstrate CSE yet. +define i96 @square_high(i96 %x) nounwind { +; X86-LABEL: square_high: +; X86: ## %bb.0: ## %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %ebp +; X86-NEXT: movl %eax, %edi +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %edx, %esi +; X86-NEXT: adcl $0, %eax +; X86-NEXT: setb %bl +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: mull %ebp +; X86-NEXT: addl %ecx, %edi +; X86-NEXT: adcl %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movzbl %bl, %ecx +; X86-NEXT: adcl %edx, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %esi +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill +; X86-NEXT: movl %ebp, %esi +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: mull %ebx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl (%esp), %ebx ## 4-byte Folded Reload +; X86-NEXT: adcl $0, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %esi +; X86-NEXT: movl %edx, %esi +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: adcl %ebp, %esi +; X86-NEXT: setb %bl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %eax +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %esi, %ebp +; X86-NEXT: movzbl %bl, %esi +; X86-NEXT: adcl %edx, %esi +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; X86-NEXT: adcl %edi, %esi +; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; X86-NEXT: adcl $0, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl (%esp), %ebx ## 4-byte Folded Reload +; X86-NEXT: adcl $0, %edi +; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; X86-NEXT: adcl %esi, %ebx +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; X86-NEXT: adcl $0, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %eax +; X86-NEXT: addl %eax, %edi +; X86-NEXT: adcl %edx, %ecx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %edi, %edx +; X86-NEXT: addl $12, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: square_high: +; X64: ## %bb.0: ## %entry +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: movq %rax, %rsi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rdi +; X64-NEXT: addq %rdx, %rsi +; X64-NEXT: adcq $0, %r8 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: addq %rsi, %rax +; X64-NEXT: adcq %r8, %rdx +; X64-NEXT: imulq %rcx, %rcx +; X64-NEXT: addq %rcx, %rdx +; X64-NEXT: shrdq $32, %rdx, %rax +; X64-NEXT: shrq $32, %rdx +; X64-NEXT: retq +entry: + %conv = zext i96 %x to i192 + %mul = mul nuw i192 %conv, %conv + %shr = lshr i192 %mul, 96 + %conv2 = trunc i192 %shr to i96 + ret i96 %conv2 +}