diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -4758,17 +4758,24 @@ unsigned Opc, MOpc; unsigned LoReg, HiReg; bool IsSigned = Opcode == ISD::SMUL_LOHI; + bool UseMULX = !IsSigned && Subtarget->hasBMI2(); switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i32: - Opc = IsSigned ? X86::IMUL32r : X86::MUL32r; - MOpc = IsSigned ? X86::IMUL32m : X86::MUL32m; - LoReg = X86::EAX; HiReg = X86::EDX; + Opc = UseMULX ? X86::MULX32rr : + IsSigned ? X86::IMUL32r : X86::MUL32r; + MOpc = UseMULX ? X86::MULX32rm : + IsSigned ? X86::IMUL32m : X86::MUL32m; + LoReg = UseMULX ? X86::EDX : X86::EAX; + HiReg = X86::EDX; break; case MVT::i64: - Opc = IsSigned ? X86::IMUL64r : X86::MUL64r; - MOpc = IsSigned ? X86::IMUL64m : X86::MUL64m; - LoReg = X86::RAX; HiReg = X86::RDX; + Opc = UseMULX ? X86::MULX64rr : + IsSigned ? X86::IMUL64r : X86::MUL64r; + MOpc = UseMULX ? X86::MULX64rm : + IsSigned ? X86::IMUL64m : X86::MUL64m; + LoReg = UseMULX ? X86::RDX : X86::RAX; + HiReg = X86::RDX; break; } @@ -4783,15 +4790,24 @@ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, N0, SDValue()).getValue(1); + SDValue ResHi, ResLo; if (foldedLoad) { SDValue Chain; MachineSDNode *CNode = nullptr; SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; - SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); - CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); - Chain = SDValue(CNode, 0); - InFlag = SDValue(CNode, 1); + if (UseMULX) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other); + CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + Chain = SDValue(CNode, 2); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); + CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); + Chain = SDValue(CNode, 0); + InFlag = SDValue(CNode, 1); + } // Update the chain. ReplaceUses(N1.getValue(1), Chain); @@ -4799,27 +4815,38 @@ CurDAG->setNodeMemRefs(CNode, {cast(N1)->getMemOperand()}); } else { SDValue Ops[] = { N1, InFlag }; - SDVTList VTs = CurDAG->getVTList(MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); - InFlag = SDValue(CNode, 0); + if (UseMULX) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); + InFlag = SDValue(CNode, 0); + } } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - assert(LoReg && "Register for low half is not defined!"); - SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, - NVT, InFlag); - InFlag = ResLo.getValue(2); + if (!ResLo) { + assert(LoReg && "Register for low half is not defined!"); + ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, + NVT, InFlag); + InFlag = ResLo.getValue(2); + } ReplaceUses(SDValue(Node, 0), ResLo); LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - assert(HiReg && "Register for high half is not defined!"); - SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, - NVT, InFlag); - InFlag = ResHi.getValue(2); + if (!ResHi) { + assert(HiReg && "Register for high half is not defined!"); + ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, + NVT, InFlag); + InFlag = ResHi.getValue(2); + } ReplaceUses(SDValue(Node, 1), ResHi); LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll --- a/llvm/test/CodeGen/X86/atomic-unordered.ll +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll @@ -837,18 +837,16 @@ ; ; CHECK-O3-CUR-LABEL: load_fold_udiv1: ; CHECK-O3-CUR: # %bb.0: -; CHECK-O3-CUR-NEXT: movq (%rdi), %rax -; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 -; CHECK-O3-CUR-NEXT: mulq %rcx -; CHECK-O3-CUR-NEXT: movq %rdx, %rax +; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx +; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 +; CHECK-O3-CUR-NEXT: mulxq %rax, %rcx, %rax ; CHECK-O3-CUR-NEXT: shrq $3, %rax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: load_fold_udiv1: ; CHECK-O3-EX: # %bb.0: -; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 -; CHECK-O3-EX-NEXT: mulq (%rdi) -; CHECK-O3-EX-NEXT: movq %rdx, %rax +; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-O3-EX-NEXT: mulxq (%rdi), %rcx, %rax ; CHECK-O3-EX-NEXT: shrq $3, %rax ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 @@ -1033,15 +1031,14 @@ ; ; CHECK-O3-LABEL: load_fold_urem1: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rcx -; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 -; CHECK-O3-NEXT: movq %rcx, %rax -; CHECK-O3-NEXT: mulq %rdx +; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 +; CHECK-O3-NEXT: movq %rax, %rdx +; CHECK-O3-NEXT: mulxq %rcx, %rcx, %rdx ; CHECK-O3-NEXT: shrq $3, %rdx -; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax -; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax -; CHECK-O3-NEXT: subq %rax, %rcx -; CHECK-O3-NEXT: movq %rcx, %rax +; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rcx +; CHECK-O3-NEXT: leaq (%rcx,%rcx,2), %rcx +; CHECK-O3-NEXT: subq %rcx, %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = urem i64 %v, 15 @@ -1694,28 +1691,28 @@ define void @rmw_fold_udiv1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_udiv1: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 -; CHECK-O0-NEXT: mulq %rcx -; CHECK-O0-NEXT: shrq $3, %rdx -; CHECK-O0-NEXT: movq %rdx, (%rdi) +; CHECK-O0-NEXT: movq (%rdi), %rdx +; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 +; CHECK-O0-NEXT: mulxq %rax, %rcx, %rax +; CHECK-O0-NEXT: shrq $3, %rax +; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: rmw_fold_udiv1: ; CHECK-O3-CUR: # %bb.0: -; CHECK-O3-CUR-NEXT: movq (%rdi), %rax -; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 -; CHECK-O3-CUR-NEXT: mulq %rcx -; CHECK-O3-CUR-NEXT: shrq $3, %rdx -; CHECK-O3-CUR-NEXT: movq %rdx, (%rdi) +; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx +; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 +; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rcx +; CHECK-O3-CUR-NEXT: shrq $3, %rcx +; CHECK-O3-CUR-NEXT: movq %rcx, (%rdi) ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: rmw_fold_udiv1: ; CHECK-O3-EX: # %bb.0: -; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 -; CHECK-O3-EX-NEXT: mulq (%rdi) -; CHECK-O3-EX-NEXT: shrq $3, %rdx -; CHECK-O3-EX-NEXT: movq %rdx, (%rdi) +; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rcx +; CHECK-O3-EX-NEXT: shrq $3, %rcx +; CHECK-O3-EX-NEXT: movq %rcx, (%rdi) ; CHECK-O3-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = udiv i64 %prev, 15 @@ -1842,27 +1839,25 @@ ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 -; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: mulq %rcx -; CHECK-O0-NEXT: shrq $3, %rdx -; CHECK-O0-NEXT: leaq (%rdx,%rdx,4), %rax -; CHECK-O0-NEXT: leaq (%rax,%rax,2), %rax -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; CHECK-O0-NEXT: subq %rax, %rcx -; CHECK-O0-NEXT: movq %rcx, (%rdi) +; CHECK-O0-NEXT: movq %rax, %rdx +; CHECK-O0-NEXT: mulxq %rcx, %rdx, %rcx +; CHECK-O0-NEXT: shrq $3, %rcx +; CHECK-O0-NEXT: leaq (%rcx,%rcx,4), %rcx +; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx +; CHECK-O0-NEXT: subq %rcx, %rax +; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_urem1: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rcx -; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 -; CHECK-O3-NEXT: movq %rcx, %rax -; CHECK-O3-NEXT: mulq %rdx -; CHECK-O3-NEXT: shrq $3, %rdx -; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax +; CHECK-O3-NEXT: movq (%rdi), %rdx +; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 +; CHECK-O3-NEXT: mulxq %rax, %rax, %rcx +; CHECK-O3-NEXT: shrq $3, %rcx +; CHECK-O3-NEXT: leaq (%rcx,%rcx,4), %rax ; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax -; CHECK-O3-NEXT: subq %rax, %rcx -; CHECK-O3-NEXT: movq %rcx, (%rdi) +; CHECK-O3-NEXT: subq %rax, %rdx +; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = urem i64 %prev, 15 diff --git a/llvm/test/CodeGen/X86/bmi2-x86_64.ll b/llvm/test/CodeGen/X86/bmi2-x86_64.ll --- a/llvm/test/CodeGen/X86/bmi2-x86_64.ll +++ b/llvm/test/CodeGen/X86/bmi2-x86_64.ll @@ -68,8 +68,8 @@ ; CHECK-LABEL: mulx64: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: mulq %rsi +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: mulxq %rsi, %rax, %rdx ; CHECK-NEXT: movq %rdx, (%rcx) ; CHECK-NEXT: retq %x1 = zext i64 %x to i128 @@ -86,8 +86,8 @@ ; CHECK-LABEL: mulx64_load: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: mulq (%rsi) +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: mulxq (%rsi), %rax, %rdx ; CHECK-NEXT: movq %rdx, (%rcx) ; CHECK-NEXT: retq %y1 = load i64, i64* %y diff --git a/llvm/test/CodeGen/X86/bmi2.ll b/llvm/test/CodeGen/X86/bmi2.ll --- a/llvm/test/CodeGen/X86/bmi2.ll +++ b/llvm/test/CodeGen/X86/bmi2.ll @@ -120,11 +120,11 @@ ; X86-LABEL: mulx32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: addl %eax, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: addl %edx, %edx -; X86-NEXT: mull %edx +; X86-NEXT: addl %eax, %eax +; X86-NEXT: mulxl %eax, %eax, %edx ; X86-NEXT: movl %edx, (%ecx) ; X86-NEXT: retl ; @@ -156,10 +156,10 @@ ; X86-LABEL: mulx32_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: addl %eax, %eax -; X86-NEXT: mull (%edx) +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: addl %edx, %edx +; X86-NEXT: mulxl (%eax), %eax, %edx ; X86-NEXT: movl %edx, (%ecx) ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/hoist-invariant-load.ll b/llvm/test/CodeGen/X86/hoist-invariant-load.ll --- a/llvm/test/CodeGen/X86/hoist-invariant-load.ll +++ b/llvm/test/CodeGen/X86/hoist-invariant-load.ll @@ -215,22 +215,21 @@ define void @test_multi_def(i64* dereferenceable(8) %x1, ; CHECK-LABEL: test_multi_def: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movq %rdx, %r8 -; CHECK-NEXT: xorl %r9d, %r9d -; CHECK-NEXT: movq (%rdi), %rdi -; CHECK-NEXT: movq (%rsi), %rsi +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: xorl %r8d, %r8d +; CHECK-NEXT: movq (%rdi), %rdx +; CHECK-NEXT: movq (%rsi), %r9 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB4_2: ## %for.body ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: mulq %rsi -; CHECK-NEXT: addq %rax, (%r8) -; CHECK-NEXT: adcq %rdx, 8(%r8) +; CHECK-NEXT: mulxq %r9, %rsi, %rdi +; CHECK-NEXT: addq %rsi, (%rax) +; CHECK-NEXT: adcq %rdi, 8(%rax) ; CHECK-NEXT: ## %bb.1: ## %for.check ; CHECK-NEXT: ## in Loop: Header=BB4_2 Depth=1 -; CHECK-NEXT: incq %r9 -; CHECK-NEXT: addq $16, %r8 -; CHECK-NEXT: cmpq %rcx, %r9 +; CHECK-NEXT: incq %r8 +; CHECK-NEXT: addq $16, %rax +; CHECK-NEXT: cmpq %rcx, %r8 ; CHECK-NEXT: jl LBB4_2 ; CHECK-NEXT: ## %bb.3: ## %exit ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/i128-mul.ll b/llvm/test/CodeGen/X86/i128-mul.ll --- a/llvm/test/CodeGen/X86/i128-mul.ll +++ b/llvm/test/CodeGen/X86/i128-mul.ll @@ -7,48 +7,86 @@ ; PR1198 define i64 @foo(i64 %x, i64 %y) nounwind { -; X86-LABEL: foo: -; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %edi, %ebp -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: addl %ebp, %eax -; X86-NEXT: adcl %ebx, %ecx -; X86-NEXT: setb %al -; X86-NEXT: movzbl %al, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %esi -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: adcl %edi, %edx -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp -; X86-NEXT: retl +; X86-NOBMI-LABEL: foo: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebp +; X86-NOBMI-NEXT: pushl %ebx +; X86-NOBMI-NEXT: pushl %edi +; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl %ecx, %eax +; X86-NOBMI-NEXT: mull %ebx +; X86-NOBMI-NEXT: movl %edx, %edi +; X86-NOBMI-NEXT: movl %ebp, %eax +; X86-NOBMI-NEXT: mull %ebx +; X86-NOBMI-NEXT: movl %edx, %ebx +; X86-NOBMI-NEXT: movl %eax, %ebp +; X86-NOBMI-NEXT: addl %edi, %ebp +; X86-NOBMI-NEXT: adcl $0, %ebx +; X86-NOBMI-NEXT: movl %ecx, %eax +; X86-NOBMI-NEXT: mull %esi +; X86-NOBMI-NEXT: movl %edx, %ecx +; X86-NOBMI-NEXT: addl %ebp, %eax +; X86-NOBMI-NEXT: adcl %ebx, %ecx +; X86-NOBMI-NEXT: setb %al +; X86-NOBMI-NEXT: movzbl %al, %edi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: mull %esi +; X86-NOBMI-NEXT: addl %ecx, %eax +; X86-NOBMI-NEXT: adcl %edi, %edx +; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx +; X86-NOBMI-NEXT: popl %ebp +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: foo: -; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %rax -; X64-NEXT: retq +; X86-BMI-LABEL: foo: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %ebp +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %edi +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: mulxl %esi, %edx, %ebx +; X86-BMI-NEXT: movl %ecx, %edx +; X86-BMI-NEXT: mulxl %esi, %esi, %ebp +; X86-BMI-NEXT: addl %ebx, %esi +; X86-BMI-NEXT: adcl $0, %ebp +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: mulxl %edi, %eax, %ebx +; X86-BMI-NEXT: addl %esi, %eax +; X86-BMI-NEXT: adcl %ebp, %ebx +; X86-BMI-NEXT: setb %al +; X86-BMI-NEXT: movzbl %al, %esi +; X86-BMI-NEXT: movl %ecx, %edx +; X86-BMI-NEXT: mulxl %edi, %eax, %edx +; X86-BMI-NEXT: addl %ebx, %eax +; X86-BMI-NEXT: adcl %esi, %edx +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: popl %edi +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: popl %ebp +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: foo: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdi, %rax +; X64-NOBMI-NEXT: mulq %rsi +; X64-NOBMI-NEXT: movq %rdx, %rax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: foo: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: movq %rdi, %rdx +; X64-BMI-NEXT: mulxq %rsi, %rcx, %rax +; X64-BMI-NEXT: retq %tmp0 = zext i64 %x to i128 %tmp1 = zext i64 %y to i128 %tmp2 = mul i128 %tmp0, %tmp1 @@ -62,107 +100,202 @@ ; zero-extended value. define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind { -; X86-LABEL: mul1: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: je .LBB1_3 -; X86-NEXT: # %bb.1: # %for.body.preheader -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: xorl %ebp, %ebp -; X86-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB1_2: # %for.body -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax,%ebp,8), %esi -; X86-NEXT: movl 4(%eax,%ebp,8), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: adcl $0, %ecx -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: mull %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %ebx, %edi -; X86-NEXT: adcl %ecx, %esi -; X86-NEXT: setb %bl -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %esi, %eax -; X86-NEXT: movzbl %bl, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: adcl %esi, %edx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: adcl $0, %eax -; X86-NEXT: adcl $0, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %ecx, (%esi,%ebp,8) -; X86-NEXT: movl %edi, 4(%esi,%ebp,8) -; X86-NEXT: addl $1, %ebp -; X86-NEXT: movl (%esp), %edi # 4-byte Reload -; X86-NEXT: adcl $0, %edi -; X86-NEXT: movl %ebp, %esi -; X86-NEXT: xorl %ebx, %esi -; X86-NEXT: movl %edi, (%esp) # 4-byte Spill -; X86-NEXT: xorl {{[0-9]+}}(%esp), %edi -; X86-NEXT: orl %esi, %edi -; X86-NEXT: jne .LBB1_2 -; X86-NEXT: .LBB1_3: # %for.end -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: addl $24, %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp -; X86-NEXT: retl +; X86-NOBMI-LABEL: mul1: +; X86-NOBMI: # %bb.0: # %entry +; X86-NOBMI-NEXT: pushl %ebp +; X86-NOBMI-NEXT: pushl %ebx +; X86-NOBMI-NEXT: pushl %edi +; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: subl $24, %esp +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: orl %ecx, %eax +; X86-NOBMI-NEXT: je .LBB1_3 +; X86-NOBMI-NEXT: # %bb.1: # %for.body.preheader +; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: xorl %edx, %edx +; X86-NOBMI-NEXT: xorl %ebp, %ebp +; X86-NOBMI-NEXT: movl $0, (%esp) # 4-byte Folded Spill +; X86-NOBMI-NEXT: .p2align 4, 0x90 +; X86-NOBMI-NEXT: .LBB1_2: # %for.body +; X86-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl (%eax,%ebp,8), %esi +; X86-NOBMI-NEXT: movl 4(%eax,%ebp,8), %ecx +; X86-NOBMI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI-NEXT: mull %edi +; X86-NOBMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI-NEXT: movl %ecx, %eax +; X86-NOBMI-NEXT: mull %edi +; X86-NOBMI-NEXT: movl %edx, %ecx +; X86-NOBMI-NEXT: movl %eax, %ebx +; X86-NOBMI-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NOBMI-NEXT: adcl $0, %ecx +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: mull %edx +; X86-NOBMI-NEXT: movl %edx, %esi +; X86-NOBMI-NEXT: movl %eax, %edi +; X86-NOBMI-NEXT: addl %ebx, %edi +; X86-NOBMI-NEXT: adcl %ecx, %esi +; X86-NOBMI-NEXT: setb %bl +; X86-NOBMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOBMI-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: addl %esi, %eax +; X86-NOBMI-NEXT: movzbl %bl, %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI-NEXT: adcl %esi, %edx +; X86-NOBMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NOBMI-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NOBMI-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NOBMI-NEXT: adcl $0, %eax +; X86-NOBMI-NEXT: adcl $0, %edx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl %ecx, (%esi,%ebp,8) +; X86-NOBMI-NEXT: movl %edi, 4(%esi,%ebp,8) +; X86-NOBMI-NEXT: addl $1, %ebp +; X86-NOBMI-NEXT: movl (%esp), %edi # 4-byte Reload +; X86-NOBMI-NEXT: adcl $0, %edi +; X86-NOBMI-NEXT: movl %ebp, %esi +; X86-NOBMI-NEXT: xorl %ebx, %esi +; X86-NOBMI-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-NOBMI-NEXT: xorl {{[0-9]+}}(%esp), %edi +; X86-NOBMI-NEXT: orl %esi, %edi +; X86-NOBMI-NEXT: jne .LBB1_2 +; X86-NOBMI-NEXT: .LBB1_3: # %for.end +; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: xorl %edx, %edx +; X86-NOBMI-NEXT: addl $24, %esp +; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx +; X86-NOBMI-NEXT: popl %ebp +; X86-NOBMI-NEXT: retl +; +; X86-BMI-LABEL: mul1: +; X86-BMI: # %bb.0: # %entry +; X86-BMI-NEXT: pushl %ebp +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %edi +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: subl $16, %esp +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: orl %ecx, %eax +; X86-BMI-NEXT: je .LBB1_3 +; X86-BMI-NEXT: # %bb.1: # %for.body.preheader +; X86-BMI-NEXT: xorl %ecx, %ecx +; X86-BMI-NEXT: xorl %edx, %edx +; X86-BMI-NEXT: xorl %ebx, %ebx +; X86-BMI-NEXT: xorl %ebp, %ebp +; X86-BMI-NEXT: .p2align 4, 0x90 +; X86-BMI-NEXT: .LBB1_2: # %for.body +; X86-BMI-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl (%ecx,%ebx,8), %eax +; X86-BMI-NEXT: movl 4(%ecx,%ebx,8), %esi +; X86-BMI-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: mulxl %ecx, %edx, %edi +; X86-BMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI-NEXT: movl %esi, %edx +; X86-BMI-NEXT: mulxl %ecx, %esi, %ecx +; X86-BMI-NEXT: addl %edi, %esi +; X86-BMI-NEXT: adcl $0, %ecx +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %edi, %eax +; X86-BMI-NEXT: addl %esi, %edi +; X86-BMI-NEXT: adcl %ecx, %eax +; X86-BMI-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %ecx, %edx +; X86-BMI-NEXT: setb (%esp) # 1-byte Folded Spill +; X86-BMI-NEXT: addl %eax, %ecx +; X86-BMI-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload +; X86-BMI-NEXT: adcl %eax, %edx +; X86-BMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-BMI-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-BMI-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-BMI-NEXT: adcl $0, %ecx +; X86-BMI-NEXT: adcl $0, %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl %esi, (%eax,%ebx,8) +; X86-BMI-NEXT: movl %edi, 4(%eax,%ebx,8) +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI-NEXT: addl $1, %ebx +; X86-BMI-NEXT: adcl $0, %ebp +; X86-BMI-NEXT: movl %ebx, %eax +; X86-BMI-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl %ebp, %esi +; X86-BMI-NEXT: xorl %edi, %esi +; X86-BMI-NEXT: orl %eax, %esi +; X86-BMI-NEXT: jne .LBB1_2 +; X86-BMI-NEXT: .LBB1_3: # %for.end +; X86-BMI-NEXT: xorl %eax, %eax +; X86-BMI-NEXT: xorl %edx, %edx +; X86-BMI-NEXT: addl $16, %esp +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: popl %edi +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: popl %ebp +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: mul1: +; X64-NOBMI: # %bb.0: # %entry +; X64-NOBMI-NEXT: testq %rdi, %rdi +; X64-NOBMI-NEXT: je .LBB1_3 +; X64-NOBMI-NEXT: # %bb.1: # %for.body.preheader +; X64-NOBMI-NEXT: movq %rcx, %r8 +; X64-NOBMI-NEXT: movq %rdx, %r9 +; X64-NOBMI-NEXT: xorl %r10d, %r10d +; X64-NOBMI-NEXT: xorl %ecx, %ecx +; X64-NOBMI-NEXT: .p2align 4, 0x90 +; X64-NOBMI-NEXT: .LBB1_2: # %for.body +; X64-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI-NEXT: movq %r8, %rax +; X64-NOBMI-NEXT: mulq (%r9,%rcx,8) +; X64-NOBMI-NEXT: addq %r10, %rax +; X64-NOBMI-NEXT: adcq $0, %rdx +; X64-NOBMI-NEXT: movq %rax, (%rsi,%rcx,8) +; X64-NOBMI-NEXT: incq %rcx +; X64-NOBMI-NEXT: cmpq %rcx, %rdi +; X64-NOBMI-NEXT: movq %rdx, %r10 +; X64-NOBMI-NEXT: jne .LBB1_2 +; X64-NOBMI-NEXT: .LBB1_3: # %for.end +; X64-NOBMI-NEXT: xorl %eax, %eax +; X64-NOBMI-NEXT: retq ; -; X64-LABEL: mul1: -; X64: # %bb.0: # %entry -; X64-NEXT: testq %rdi, %rdi -; X64-NEXT: je .LBB1_3 -; X64-NEXT: # %bb.1: # %for.body.preheader -; X64-NEXT: movq %rcx, %r8 -; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: xorl %r10d, %r10d -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB1_2: # %for.body -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq (%r9,%rcx,8) -; X64-NEXT: addq %r10, %rax -; X64-NEXT: adcq $0, %rdx -; X64-NEXT: movq %rax, (%rsi,%rcx,8) -; X64-NEXT: incq %rcx -; X64-NEXT: cmpq %rcx, %rdi -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: jne .LBB1_2 -; X64-NEXT: .LBB1_3: # %for.end -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: retq +; X64-BMI-LABEL: mul1: +; X64-BMI: # %bb.0: # %entry +; X64-BMI-NEXT: testq %rdi, %rdi +; X64-BMI-NEXT: je .LBB1_3 +; X64-BMI-NEXT: # %bb.1: # %for.body.preheader +; X64-BMI-NEXT: movq %rcx, %r8 +; X64-BMI-NEXT: movq %rdx, %r9 +; X64-BMI-NEXT: xorl %r10d, %r10d +; X64-BMI-NEXT: xorl %ecx, %ecx +; X64-BMI-NEXT: .p2align 4, 0x90 +; X64-BMI-NEXT: .LBB1_2: # %for.body +; X64-BMI-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI-NEXT: movq %r8, %rdx +; X64-BMI-NEXT: mulxq (%r9,%rcx,8), %rax, %rdx +; X64-BMI-NEXT: addq %r10, %rax +; X64-BMI-NEXT: adcq $0, %rdx +; X64-BMI-NEXT: movq %rax, (%rsi,%rcx,8) +; X64-BMI-NEXT: incq %rcx +; X64-BMI-NEXT: cmpq %rcx, %rdi +; X64-BMI-NEXT: movq %rdx, %r10 +; X64-BMI-NEXT: jne .LBB1_2 +; X64-BMI-NEXT: .LBB1_3: # %for.end +; X64-BMI-NEXT: xorl %eax, %eax +; X64-BMI-NEXT: retq entry: %conv = zext i64 %y to i128 %cmp11 = icmp eq i64 %n, 0 diff --git a/llvm/test/CodeGen/X86/mulx32.ll b/llvm/test/CodeGen/X86/mulx32.ll --- a/llvm/test/CodeGen/X86/mulx32.ll +++ b/llvm/test/CodeGen/X86/mulx32.ll @@ -5,8 +5,8 @@ define i64 @f1(i32 %a, i32 %b) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: mull {{[0-9]+}}(%esp) +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: mulxl {{[0-9]+}}(%esp), %eax, %edx ; CHECK-NEXT: retl %x = zext i32 %a to i64 %y = zext i32 %b to i64 @@ -17,9 +17,9 @@ define i64 @f2(i32 %a, i32* %p) { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: mull (%ecx) +; CHECK-NEXT: mulxl (%eax), %eax, %edx ; CHECK-NEXT: retl %b = load i32, i32* %p %x = zext i32 %a to i64 diff --git a/llvm/test/CodeGen/X86/mulx64.ll b/llvm/test/CodeGen/X86/mulx64.ll --- a/llvm/test/CodeGen/X86/mulx64.ll +++ b/llvm/test/CodeGen/X86/mulx64.ll @@ -5,8 +5,8 @@ define i128 @f1(i64 %a, i64 %b) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: mulq %rsi +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: mulxq %rsi, %rax, %rdx ; CHECK-NEXT: retq %x = zext i64 %a to i128 %y = zext i64 %b to i128 @@ -17,8 +17,8 @@ define i128 @f2(i64 %a, i64* %p) { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: mulq (%rsi) +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: mulxq (%rsi), %rax, %rdx ; CHECK-NEXT: retq %b = load i64, i64* %p %x = zext i64 %a to i128 diff --git a/llvm/test/CodeGen/X86/pr35636.ll b/llvm/test/CodeGen/X86/pr35636.ll --- a/llvm/test/CodeGen/X86/pr35636.ll +++ b/llvm/test/CodeGen/X86/pr35636.ll @@ -5,11 +5,11 @@ define void @_Z15uint64_to_asciimPc(i64 %arg) { ; HSW-LABEL: _Z15uint64_to_asciimPc: ; HSW: # %bb.0: # %bb -; HSW-NEXT: movq %rdi, %rax -; HSW-NEXT: movabsq $811296384146066817, %rcx # imm = 0xB424DC35095CD81 -; HSW-NEXT: mulq %rcx -; HSW-NEXT: shrq $42, %rdx -; HSW-NEXT: imulq $281474977, %rdx, %rax # imm = 0x10C6F7A1 +; HSW-NEXT: movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81 +; HSW-NEXT: movq %rdi, %rdx +; HSW-NEXT: mulxq %rax, %rax, %rcx +; HSW-NEXT: shrq $42, %rcx +; HSW-NEXT: imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1 ; HSW-NEXT: shrq $20, %rax ; HSW-NEXT: leal (%rax,%rax,4), %eax ; HSW-NEXT: addl $5, %eax @@ -22,11 +22,11 @@ ; ; ZN-LABEL: _Z15uint64_to_asciimPc: ; ZN: # %bb.0: # %bb -; ZN-NEXT: movq %rdi, %rax -; ZN-NEXT: movabsq $811296384146066817, %rcx # imm = 0xB424DC35095CD81 -; ZN-NEXT: mulq %rcx -; ZN-NEXT: shrq $42, %rdx -; ZN-NEXT: imulq $281474977, %rdx, %rax # imm = 0x10C6F7A1 +; ZN-NEXT: movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81 +; ZN-NEXT: movq %rdi, %rdx +; ZN-NEXT: mulxq %rax, %rax, %rcx +; ZN-NEXT: shrq $42, %rcx +; ZN-NEXT: imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1 ; ZN-NEXT: shrq $20, %rax ; ZN-NEXT: leal 5(%rax,%rax,4), %eax ; ZN-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF