Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3410,14 +3410,11 @@ unsigned Opc, MOpc; bool isSigned = Opcode == ISD::SMUL_LOHI; - bool hasBMI2 = Subtarget->hasBMI2(); if (!isSigned) { switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); - case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r; - MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break; - case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r; - MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break; + case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; + case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; } } else { switch (NVT.SimpleTy) { @@ -3438,12 +3435,6 @@ case X86::MUL64r: SrcReg = LoReg = X86::RAX; HiReg = X86::RDX; break; - case X86::MULX32rr: - SrcReg = X86::EDX; LoReg = HiReg = 0; - break; - case X86::MULX64rr: - SrcReg = X86::RDX; LoReg = HiReg = 0; - break; } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; @@ -3457,26 +3448,15 @@ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg, N0, SDValue()).getValue(1); - SDValue ResHi, ResLo; - if (foldedLoad) { SDValue Chain; MachineSDNode *CNode = nullptr; SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; - if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { - SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); - CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); - ResHi = SDValue(CNode, 0); - ResLo = SDValue(CNode, 1); - Chain = SDValue(CNode, 2); - InFlag = SDValue(CNode, 3); - } else { - SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); - CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); - Chain = SDValue(CNode, 0); - InFlag = SDValue(CNode, 1); - } + SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); + CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); + Chain = SDValue(CNode, 0); + InFlag = SDValue(CNode, 1); // Update the chain. ReplaceUses(N1.getValue(1), Chain); @@ -3484,39 +3464,27 @@ CurDAG->setNodeMemRefs(CNode, {cast(N1)->getMemOperand()}); } else { SDValue Ops[] = { N1, InFlag }; - if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { - SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); - ResHi = SDValue(CNode, 0); - ResLo = SDValue(CNode, 1); - InFlag = SDValue(CNode, 2); - } else { - SDVTList VTs = CurDAG->getVTList(MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); - InFlag = SDValue(CNode, 0); - } + SDVTList VTs = CurDAG->getVTList(MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); + InFlag = SDValue(CNode, 0); } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - if (!ResLo.getNode()) { - assert(LoReg && "Register for low half is not defined!"); - ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, - InFlag); - InFlag = ResLo.getValue(2); - } + assert(LoReg && "Register for low half is not defined!"); + SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, + NVT, InFlag); + InFlag = ResLo.getValue(2); ReplaceUses(SDValue(Node, 0), ResLo); LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - if (!ResHi.getNode()) { - assert(HiReg && "Register for high half is not defined!"); - ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT, - InFlag); - InFlag = ResHi.getValue(2); - } + assert(HiReg && "Register for high half is not defined!"); + SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, + NVT, InFlag); + InFlag = ResHi.getValue(2); ReplaceUses(SDValue(Node, 1), ResHi); LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); Index: test/CodeGen/X86/bmi2-x86_64.ll =================================================================== --- test/CodeGen/X86/bmi2-x86_64.ll +++ test/CodeGen/X86/bmi2-x86_64.ll @@ -68,8 +68,8 @@ ; CHECK-LABEL: mulx64: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: movq %rdi, %rdx -; CHECK-NEXT: mulxq %rsi, %rax, %rdx +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: mulq %rsi ; CHECK-NEXT: movq %rdx, (%rcx) ; CHECK-NEXT: retq %x1 = zext i64 %x to i128 @@ -86,8 +86,8 @@ ; CHECK-LABEL: mulx64_load: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: movq %rdi, %rdx -; CHECK-NEXT: mulxq (%rsi), %rax, %rdx +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: mulq (%rsi) ; CHECK-NEXT: movq %rdx, (%rcx) ; CHECK-NEXT: retq %y1 = load i64, i64* %y Index: test/CodeGen/X86/bmi2.ll =================================================================== --- test/CodeGen/X86/bmi2.ll +++ test/CodeGen/X86/bmi2.ll @@ -120,11 +120,11 @@ ; X86-LABEL: mulx32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: addl %edx, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addl %eax, %eax -; X86-NEXT: mulxl %eax, %eax, %edx +; X86-NEXT: addl %edx, %edx +; X86-NEXT: mull %edx ; X86-NEXT: movl %edx, (%ecx) ; X86-NEXT: retl ; @@ -156,10 +156,10 @@ ; X86-LABEL: mulx32_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: addl %edx, %edx -; X86-NEXT: mulxl (%eax), %eax, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl %eax, %eax +; X86-NEXT: mull (%edx) ; X86-NEXT: movl %edx, (%ecx) ; X86-NEXT: retl ; Index: test/CodeGen/X86/i128-mul.ll =================================================================== --- test/CodeGen/X86/i128-mul.ll +++ test/CodeGen/X86/i128-mul.ll @@ -7,108 +7,61 @@ ; PR1198 define i64 @foo(i64 %x, i64 %y) nounwind { -; X86-NOBMI-LABEL: foo: -; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: pushl %ebp -; X86-NOBMI-NEXT: pushl %ebx -; X86-NOBMI-NEXT: pushl %edi -; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NOBMI-NEXT: movl %ecx, %eax -; X86-NOBMI-NEXT: mull %ebp -; X86-NOBMI-NEXT: movl %edx, %ebx -; X86-NOBMI-NEXT: movl %esi, %eax -; X86-NOBMI-NEXT: mull %ebp -; X86-NOBMI-NEXT: movl %edx, %ebp -; X86-NOBMI-NEXT: movl %eax, %esi -; X86-NOBMI-NEXT: addl %ebx, %esi -; X86-NOBMI-NEXT: adcl $0, %ebp -; X86-NOBMI-NEXT: movl %ecx, %eax -; X86-NOBMI-NEXT: mull %edi -; X86-NOBMI-NEXT: movl %edx, %ebx -; X86-NOBMI-NEXT: addl %esi, %eax -; X86-NOBMI-NEXT: adcl %ebp, %ebx -; X86-NOBMI-NEXT: setb %al -; X86-NOBMI-NEXT: movzbl %al, %ecx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: mull %edi -; X86-NOBMI-NEXT: movl %edx, %esi -; X86-NOBMI-NEXT: movl %eax, %ebp -; X86-NOBMI-NEXT: addl %ebx, %ebp -; X86-NOBMI-NEXT: adcl %ecx, %esi -; X86-NOBMI-NEXT: xorl %ecx, %ecx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: mull %ecx -; X86-NOBMI-NEXT: movl %edx, %edi -; X86-NOBMI-NEXT: movl %eax, %ebx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: mull %ecx -; X86-NOBMI-NEXT: addl %ebx, %eax -; X86-NOBMI-NEXT: adcl %edi, %edx -; X86-NOBMI-NEXT: addl %ebp, %eax -; X86-NOBMI-NEXT: adcl %esi, %edx -; X86-NOBMI-NEXT: popl %esi -; X86-NOBMI-NEXT: popl %edi -; X86-NOBMI-NEXT: popl %ebx -; X86-NOBMI-NEXT: popl %ebp -; X86-NOBMI-NEXT: retl +; X86-LABEL: foo: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %ebp +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %ebp +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, %esi +; X86-NEXT: addl %ebx, %esi +; X86-NEXT: adcl $0, %ebp +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: addl %esi, %eax +; X86-NEXT: adcl %ebp, %ebx +; X86-NEXT: setb %al +; X86-NEXT: movzbl %al, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %ebx, %ebp +; X86-NEXT: adcl %ecx, %esi +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %ecx +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: adcl %edi, %edx +; X86-NEXT: addl %ebp, %eax +; X86-NEXT: adcl %esi, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl ; -; X86-BMI-LABEL: foo: -; X86-BMI: # %bb.0: -; X86-BMI-NEXT: pushl %ebp -; X86-BMI-NEXT: pushl %ebx -; X86-BMI-NEXT: pushl %edi -; X86-BMI-NEXT: pushl %esi -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI-NEXT: movl %ecx, %edx -; X86-BMI-NEXT: mulxl %esi, %edx, %ebx -; X86-BMI-NEXT: movl %eax, %edx -; X86-BMI-NEXT: mulxl %esi, %ebp, %eax -; X86-BMI-NEXT: addl %ebx, %ebp -; X86-BMI-NEXT: adcl $0, %eax -; X86-BMI-NEXT: movl %ecx, %edx -; X86-BMI-NEXT: mulxl %edi, %edx, %ebx -; X86-BMI-NEXT: addl %ebp, %edx -; X86-BMI-NEXT: adcl %eax, %ebx -; X86-BMI-NEXT: setb %al -; X86-BMI-NEXT: movzbl %al, %eax -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI-NEXT: mulxl %edi, %edi, %ebp -; X86-BMI-NEXT: addl %ebx, %edi -; X86-BMI-NEXT: adcl %eax, %ebp -; X86-BMI-NEXT: xorl %eax, %eax -; X86-BMI-NEXT: movl %esi, %edx -; X86-BMI-NEXT: mulxl %eax, %ebx, %esi -; X86-BMI-NEXT: movl %ecx, %edx -; X86-BMI-NEXT: mulxl %eax, %eax, %edx -; X86-BMI-NEXT: addl %ebx, %eax -; X86-BMI-NEXT: adcl %esi, %edx -; X86-BMI-NEXT: addl %edi, %eax -; X86-BMI-NEXT: adcl %ebp, %edx -; X86-BMI-NEXT: popl %esi -; X86-BMI-NEXT: popl %edi -; X86-BMI-NEXT: popl %ebx -; X86-BMI-NEXT: popl %ebp -; X86-BMI-NEXT: retl -; -; X64-NOBMI-LABEL: foo: -; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movq %rdi, %rax -; X64-NOBMI-NEXT: mulq %rsi -; X64-NOBMI-NEXT: movq %rdx, %rax -; X64-NOBMI-NEXT: retq -; -; X64-BMI-LABEL: foo: -; X64-BMI: # %bb.0: -; X64-BMI-NEXT: movq %rdi, %rdx -; X64-BMI-NEXT: mulxq %rsi, %rcx, %rax -; X64-BMI-NEXT: retq +; X64-LABEL: foo: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: retq %tmp0 = zext i64 %x to i128 %tmp1 = zext i64 %y to i128 %tmp2 = mul i128 %tmp0, %tmp1 @@ -122,236 +75,125 @@ ; zero-extended value. define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind { -; X86-NOBMI-LABEL: mul1: -; X86-NOBMI: # %bb.0: # %entry -; X86-NOBMI-NEXT: pushl %ebp -; X86-NOBMI-NEXT: pushl %ebx -; X86-NOBMI-NEXT: pushl %edi -; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: subl $28, %esp -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: orl %ecx, %eax -; X86-NOBMI-NEXT: je .LBB1_3 -; X86-NOBMI-NEXT: # %bb.1: # %for.body.preheader -; X86-NOBMI-NEXT: xorl %eax, %eax -; X86-NOBMI-NEXT: xorl %edx, %edx -; X86-NOBMI-NEXT: xorl %ebx, %ebx -; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X86-NOBMI-NEXT: .p2align 4, 0x90 -; X86-NOBMI-NEXT: .LBB1_2: # %for.body -; X86-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOBMI-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movl %eax, %ecx -; X86-NOBMI-NEXT: movl (%eax,%ebx,8), %ebp -; X86-NOBMI-NEXT: movl 4(%eax,%ebx,8), %esi -; X86-NOBMI-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOBMI-NEXT: movl %ebp, %eax -; X86-NOBMI-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: mull %ecx -; X86-NOBMI-NEXT: movl %edx, %edi -; X86-NOBMI-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOBMI-NEXT: movl %esi, %eax -; X86-NOBMI-NEXT: mull %ecx -; X86-NOBMI-NEXT: movl %edx, %ecx -; X86-NOBMI-NEXT: movl %eax, %esi -; X86-NOBMI-NEXT: addl %edi, %esi -; X86-NOBMI-NEXT: adcl $0, %ecx -; X86-NOBMI-NEXT: movl %ebp, %eax -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: mull %edx -; X86-NOBMI-NEXT: movl %edx, %ebp -; X86-NOBMI-NEXT: movl %eax, %edi -; X86-NOBMI-NEXT: addl %esi, %edi -; X86-NOBMI-NEXT: adcl %ecx, %ebp -; X86-NOBMI-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X86-NOBMI-NEXT: mull {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: movl %edx, %ecx -; X86-NOBMI-NEXT: movl %eax, %esi -; X86-NOBMI-NEXT: addl %ebp, %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload -; X86-NOBMI-NEXT: adcl %eax, %ecx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: xorl %edx, %edx -; X86-NOBMI-NEXT: mull %edx -; X86-NOBMI-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOBMI-NEXT: movl %eax, %ebp -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X86-NOBMI-NEXT: xorl %edx, %edx -; X86-NOBMI-NEXT: mull %edx -; X86-NOBMI-NEXT: addl %ebp, %eax -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NOBMI-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X86-NOBMI-NEXT: addl %esi, %eax -; X86-NOBMI-NEXT: adcl %ecx, %edx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X86-NOBMI-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X86-NOBMI-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X86-NOBMI-NEXT: adcl $0, %eax -; X86-NOBMI-NEXT: adcl $0, %edx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: movl %esi, (%ecx,%ebx,8) -; X86-NOBMI-NEXT: movl %edi, 4(%ecx,%ebx,8) -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: movl %ecx, %edi -; X86-NOBMI-NEXT: addl $1, %ebx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X86-NOBMI-NEXT: adcl $0, %esi -; X86-NOBMI-NEXT: movl %ebx, %ecx -; X86-NOBMI-NEXT: xorl %ebp, %ecx -; X86-NOBMI-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOBMI-NEXT: xorl %edi, %esi -; X86-NOBMI-NEXT: orl %ecx, %esi -; X86-NOBMI-NEXT: jne .LBB1_2 -; X86-NOBMI-NEXT: .LBB1_3: # %for.end -; X86-NOBMI-NEXT: xorl %eax, %eax -; X86-NOBMI-NEXT: xorl %edx, %edx -; X86-NOBMI-NEXT: addl $28, %esp -; X86-NOBMI-NEXT: popl %esi -; X86-NOBMI-NEXT: popl %edi -; X86-NOBMI-NEXT: popl %ebx -; X86-NOBMI-NEXT: popl %ebp -; X86-NOBMI-NEXT: retl -; -; X86-BMI-LABEL: mul1: -; X86-BMI: # %bb.0: # %entry -; X86-BMI-NEXT: pushl %ebp -; X86-BMI-NEXT: pushl %ebx -; X86-BMI-NEXT: pushl %edi -; X86-BMI-NEXT: pushl %esi -; X86-BMI-NEXT: subl $20, %esp -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI-NEXT: orl %ecx, %eax -; X86-BMI-NEXT: je .LBB1_3 -; X86-BMI-NEXT: # %bb.1: # %for.body.preheader -; X86-BMI-NEXT: xorl %ecx, %ecx -; X86-BMI-NEXT: xorl %edx, %edx -; X86-BMI-NEXT: xorl %edi, %edi -; X86-BMI-NEXT: movl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X86-BMI-NEXT: .p2align 4, 0x90 -; X86-BMI-NEXT: .LBB1_2: # %for.body -; X86-BMI-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-BMI-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI-NEXT: movl (%eax,%edi,8), %ecx -; X86-BMI-NEXT: movl 4(%eax,%edi,8), %ebx -; X86-BMI-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X86-BMI-NEXT: movl %ecx, %edx -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI-NEXT: movl %eax, %esi -; X86-BMI-NEXT: mulxl %eax, %eax, %ebp -; X86-BMI-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-BMI-NEXT: movl %ebx, %edx -; X86-BMI-NEXT: mulxl %esi, %eax, %esi -; X86-BMI-NEXT: addl %ebp, %eax -; X86-BMI-NEXT: adcl $0, %esi -; X86-BMI-NEXT: movl %ecx, %edx -; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %ebp, %ebx -; X86-BMI-NEXT: addl %eax, %ebp -; X86-BMI-NEXT: adcl %esi, %ebx -; X86-BMI-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %eax, %esi -; X86-BMI-NEXT: setb %dl -; X86-BMI-NEXT: addl %ebx, %eax -; X86-BMI-NEXT: movzbl %dl, %edx -; X86-BMI-NEXT: adcl %edx, %esi -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI-NEXT: xorl %ebx, %ebx -; X86-BMI-NEXT: mulxl %ebx, %ebx, %edx -; X86-BMI-NEXT: movl %edx, (%esp) # 4-byte Spill -; X86-BMI-NEXT: movl %ecx, %edx -; X86-BMI-NEXT: xorl %ecx, %ecx -; X86-BMI-NEXT: mulxl %ecx, %ecx, %edx -; X86-BMI-NEXT: addl %ebx, %ecx -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-BMI-NEXT: adcl (%esp), %edx # 4-byte Folded Reload -; X86-BMI-NEXT: addl %eax, %ecx -; X86-BMI-NEXT: adcl %esi, %edx -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X86-BMI-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X86-BMI-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X86-BMI-NEXT: adcl $0, %ecx -; X86-BMI-NEXT: adcl $0, %edx -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI-NEXT: movl %esi, (%eax,%edi,8) -; X86-BMI-NEXT: movl %ebp, 4(%eax,%edi,8) -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI-NEXT: movl %eax, %esi -; X86-BMI-NEXT: addl $1, %edi -; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload -; X86-BMI-NEXT: adcl $0, %ebp -; X86-BMI-NEXT: movl %edi, %eax -; X86-BMI-NEXT: xorl %esi, %eax -; X86-BMI-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-BMI-NEXT: movl %ebp, %esi -; X86-BMI-NEXT: xorl %ebx, %esi -; X86-BMI-NEXT: orl %eax, %esi -; X86-BMI-NEXT: jne .LBB1_2 -; X86-BMI-NEXT: .LBB1_3: # %for.end -; X86-BMI-NEXT: xorl %eax, %eax -; X86-BMI-NEXT: xorl %edx, %edx -; X86-BMI-NEXT: addl $20, %esp -; X86-BMI-NEXT: popl %esi -; X86-BMI-NEXT: popl %edi -; X86-BMI-NEXT: popl %ebx -; X86-BMI-NEXT: popl %ebp -; X86-BMI-NEXT: retl -; -; X64-NOBMI-LABEL: mul1: -; X64-NOBMI: # %bb.0: # %entry -; X64-NOBMI-NEXT: testq %rdi, %rdi -; X64-NOBMI-NEXT: je .LBB1_3 -; X64-NOBMI-NEXT: # %bb.1: # %for.body.preheader -; X64-NOBMI-NEXT: movq %rcx, %r8 -; X64-NOBMI-NEXT: movq %rdx, %r9 -; X64-NOBMI-NEXT: xorl %r10d, %r10d -; X64-NOBMI-NEXT: xorl %ecx, %ecx -; X64-NOBMI-NEXT: .p2align 4, 0x90 -; X64-NOBMI-NEXT: .LBB1_2: # %for.body -; X64-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI-NEXT: movq %r8, %rax -; X64-NOBMI-NEXT: mulq (%r9,%rcx,8) -; X64-NOBMI-NEXT: addq %r10, %rax -; X64-NOBMI-NEXT: adcq $0, %rdx -; X64-NOBMI-NEXT: movq %rax, (%rsi,%rcx,8) -; X64-NOBMI-NEXT: incq %rcx -; X64-NOBMI-NEXT: cmpq %rcx, %rdi -; X64-NOBMI-NEXT: movq %rdx, %r10 -; X64-NOBMI-NEXT: jne .LBB1_2 -; X64-NOBMI-NEXT: .LBB1_3: # %for.end -; X64-NOBMI-NEXT: xorl %eax, %eax -; X64-NOBMI-NEXT: retq +; X86-LABEL: mul1: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $28, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: je .LBB1_3 +; X86-NEXT: # %bb.1: # %for.body.preheader +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: .p2align 4, 0x90 +; X86-NEXT: .LBB1_2: # %for.body +; X86-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl (%eax,%ebx,8), %ebp +; X86-NEXT: movl 4(%eax,%ebx,8), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: addl %edi, %esi +; X86-NEXT: adcl $0, %ecx +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: mull %edx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, %edi +; X86-NEXT: addl %esi, %edi +; X86-NEXT: adcl %ecx, %ebp +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: addl %ebp, %esi +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: adcl %eax, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: mull %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: mull %edx +; X86-NEXT: addl %ebp, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: addl %esi, %eax +; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: adcl $0, %eax +; X86-NEXT: adcl $0, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %esi, (%ecx,%ebx,8) +; X86-NEXT: movl %edi, 4(%ecx,%ebx,8) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: addl $1, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: adcl $0, %esi +; X86-NEXT: movl %ebx, %ecx +; X86-NEXT: xorl %ebp, %ecx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: xorl %edi, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: jne .LBB1_2 +; X86-NEXT: .LBB1_3: # %for.end +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: addl $28, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl ; -; X64-BMI-LABEL: mul1: -; X64-BMI: # %bb.0: # %entry -; X64-BMI-NEXT: testq %rdi, %rdi -; X64-BMI-NEXT: je .LBB1_3 -; X64-BMI-NEXT: # %bb.1: # %for.body.preheader -; X64-BMI-NEXT: movq %rcx, %r8 -; X64-BMI-NEXT: movq %rdx, %r9 -; X64-BMI-NEXT: xorl %r10d, %r10d -; X64-BMI-NEXT: xorl %ecx, %ecx -; X64-BMI-NEXT: .p2align 4, 0x90 -; X64-BMI-NEXT: .LBB1_2: # %for.body -; X64-BMI-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI-NEXT: movq %r8, %rdx -; X64-BMI-NEXT: mulxq (%r9,%rcx,8), %rax, %rdx -; X64-BMI-NEXT: addq %r10, %rax -; X64-BMI-NEXT: adcq $0, %rdx -; X64-BMI-NEXT: movq %rax, (%rsi,%rcx,8) -; X64-BMI-NEXT: incq %rcx -; X64-BMI-NEXT: cmpq %rcx, %rdi -; X64-BMI-NEXT: movq %rdx, %r10 -; X64-BMI-NEXT: jne .LBB1_2 -; X64-BMI-NEXT: .LBB1_3: # %for.end -; X64-BMI-NEXT: xorl %eax, %eax -; X64-BMI-NEXT: retq +; X64-LABEL: mul1: +; X64: # %bb.0: # %entry +; X64-NEXT: testq %rdi, %rdi +; X64-NEXT: je .LBB1_3 +; X64-NEXT: # %bb.1: # %for.body.preheader +; X64-NEXT: movq %rcx, %r8 +; X64-NEXT: movq %rdx, %r9 +; X64-NEXT: xorl %r10d, %r10d +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: .p2align 4, 0x90 +; X64-NEXT: .LBB1_2: # %for.body +; X64-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq (%r9,%rcx,8) +; X64-NEXT: addq %r10, %rax +; X64-NEXT: adcq $0, %rdx +; X64-NEXT: movq %rax, (%rsi,%rcx,8) +; X64-NEXT: incq %rcx +; X64-NEXT: cmpq %rcx, %rdi +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: jne .LBB1_2 +; X64-NEXT: .LBB1_3: # %for.end +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: retq entry: %conv = zext i64 %y to i128 %cmp11 = icmp eq i64 %n, 0 Index: test/CodeGen/X86/mulx32.ll =================================================================== --- test/CodeGen/X86/mulx32.ll +++ test/CodeGen/X86/mulx32.ll @@ -5,8 +5,8 @@ define i64 @f1(i32 %a, i32 %b) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: mulxl {{[0-9]+}}(%esp), %eax, %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: mull {{[0-9]+}}(%esp) ; CHECK-NEXT: retl %x = zext i32 %a to i64 %y = zext i32 %b to i64 @@ -17,9 +17,9 @@ define i64 @f2(i32 %a, i32* %p) { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: mulxl (%eax), %eax, %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: mull (%ecx) ; CHECK-NEXT: retl %b = load i32, i32* %p %x = zext i32 %a to i64 Index: test/CodeGen/X86/mulx64.ll =================================================================== --- test/CodeGen/X86/mulx64.ll +++ test/CodeGen/X86/mulx64.ll @@ -5,8 +5,8 @@ define i128 @f1(i64 %a, i64 %b) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rdx -; CHECK-NEXT: mulxq %rsi, %rax, %rdx +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: mulq %rsi ; CHECK-NEXT: retq %x = zext i64 %a to i128 %y = zext i64 %b to i128 @@ -17,8 +17,8 @@ define i128 @f2(i64 %a, i64* %p) { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rdx -; CHECK-NEXT: mulxq (%rsi), %rax, %rdx +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: mulq (%rsi) ; CHECK-NEXT: retq %b = load i64, i64* %p %x = zext i64 %a to i128 Index: test/CodeGen/X86/pr35636.ll =================================================================== --- test/CodeGen/X86/pr35636.ll +++ test/CodeGen/X86/pr35636.ll @@ -5,11 +5,11 @@ define void @_Z15uint64_to_asciimPc(i64 %arg) { ; HSW-LABEL: _Z15uint64_to_asciimPc: ; HSW: # %bb.0: # %bb -; HSW-NEXT: movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81 -; HSW-NEXT: movq %rdi, %rdx -; HSW-NEXT: mulxq %rax, %rax, %rcx -; HSW-NEXT: shrq $42, %rcx -; HSW-NEXT: imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1 +; HSW-NEXT: movq %rdi, %rax +; HSW-NEXT: movabsq $811296384146066817, %rcx # imm = 0xB424DC35095CD81 +; HSW-NEXT: mulq %rcx +; HSW-NEXT: shrq $42, %rdx +; HSW-NEXT: imulq $281474977, %rdx, %rax # imm = 0x10C6F7A1 ; HSW-NEXT: shrq $20, %rax ; HSW-NEXT: leal (%rax,%rax,4), %eax ; HSW-NEXT: addl $5, %eax @@ -22,11 +22,11 @@ ; ; ZN-LABEL: _Z15uint64_to_asciimPc: ; ZN: # %bb.0: # %bb -; ZN-NEXT: movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81 -; ZN-NEXT: movq %rdi, %rdx -; ZN-NEXT: mulxq %rax, %rax, %rcx -; ZN-NEXT: shrq $42, %rcx -; ZN-NEXT: imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1 +; ZN-NEXT: movq %rdi, %rax +; ZN-NEXT: movabsq $811296384146066817, %rcx # imm = 0xB424DC35095CD81 +; ZN-NEXT: mulq %rcx +; ZN-NEXT: shrq $42, %rdx +; ZN-NEXT: imulq $281474977, %rdx, %rax # imm = 0x10C6F7A1 ; ZN-NEXT: shrq $20, %rax ; ZN-NEXT: leal 5(%rax,%rax,4), %eax ; ZN-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF Index: test/CodeGen/X86/stack-folding-bmi2.ll =================================================================== --- test/CodeGen/X86/stack-folding-bmi2.ll +++ test/CodeGen/X86/stack-folding-bmi2.ll @@ -28,7 +28,7 @@ define i64 @stack_fold_mulx_u64(i64 %a0, i64 %a1, i64 *%a2) { ;CHECK-LABEL: stack_fold_mulx_u64 - ;CHECK: mulxq {{-?[0-9]*}}(%rsp), %rax, %rcx {{.*#+}} 8-byte Folded Reload + ;CHECK: mulq {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Reload %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() %2 = zext i64 %a0 to i128 %3 = zext i64 %a1 to i128