diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5280,7 +5280,8 @@ // If the type is twice as wide is legal, transform the mulhu to a wider // multiply plus a shift. - if (VT.isSimple() && !VT.isVector()) { + if (VT.isSimple() && !VT.isVector() && + !TLI.isOperationLegal(ISD::SMUL_LOHI, VT)) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); @@ -5330,7 +5331,8 @@ // If the type is twice as wide is legal, transform the mulhu to a wider // multiply plus a shift. - if (VT.isSimple() && !VT.isVector()) { + if (VT.isSimple() && !VT.isVector() && + !TLI.isOperationLegal(ISD::UMUL_LOHI, VT)) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5255,6 +5255,18 @@ bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty(); switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: + Opc = IsSigned ? X86::IMUL8r : X86::MUL8r; + MOpc = IsSigned ? X86::IMUL8m : X86::MUL8m; + LoReg = X86::AL; + HiReg = X86::AH; + break; + case MVT::i16: + Opc = IsSigned ? X86::IMUL16r : X86::MUL16r; + MOpc = IsSigned ? X86::IMUL16m : X86::MUL16m; + LoReg = X86::AX; + HiReg = X86::DX; + break; case MVT::i32: Opc = UseMULXHi ? X86::MULX32Hrr : UseMULX ? X86::MULX32rr : diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53382,7 +53382,46 @@ PMADDBuilder); } +// Attempt to match MULX, which multiplies corresponding unsigned int and +// extracts high part and low part respectively. +// +// Which looks something like this: +// (i32 (trunc (shr (mul (zext (i32 A)), (zext (i32 B))), 32))) +static SDValue detectMULX(SDValue In, EVT VT, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget, const SDLoc &DL) { + if (VT != MVT::i32 || !Subtarget.hasBMI2() || In.getOpcode() != ISD::SRL) + return SDValue(); + SDValue Op0 = In.getOperand(0); + auto *C = dyn_cast(In.getOperand(1)); + if (!C || C->getZExtValue() != 32 || Op0.getOpcode() != ISD::MUL) + return SDValue(); + SDValue Op00 = Op0.getOperand(0); + SDValue Op01 = Op0.getOperand(1); + if (Op00.getOpcode() != ISD::ZERO_EXTEND || + Op00.getOperand(0).getValueType() != MVT::i32 || + Op01.getOpcode() != ISD::ZERO_EXTEND || + Op01.getOperand(0).getValueType() != MVT::i32) + return SDValue(); + SmallVector UserL; + for (SDNode *User : Op0->uses()) { + if (User == In.getNode()) + continue; + if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != VT) + return SDValue(); + UserL.push_back(SDValue(User, 0)); + } + SDValue Lo = DAG.getNode(ISD::UMUL_LOHI, DL, DAG.getVTList(VT, VT), + Op00.getOperand(0), Op01.getOperand(0)); + for (SDValue U : UserL) { + DAG.ReplaceAllUsesOfValueWith(U, Lo); + DCI.recursivelyDeleteUnusedNodes(U.getNode()); + } + return SDValue(Lo.getNode(), 1); +} + static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); SDValue Src = N->getOperand(0); @@ -53400,6 +53439,11 @@ if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL)) return PMAdd; + // Try to detect MULX + if (SDValue MulX = detectMULX(Src, VT, DAG, DCI, Subtarget, DL)) { + return MulX; + } + // Try to combine truncation with signed/unsigned saturation. if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget)) return Val; @@ -58023,7 +58067,7 @@ case X86ISD::VFCMULC: case X86ISD::VFMULC: return combineFMulcFCMulc(N, DAG, Subtarget); case ISD::FNEG: return combineFneg(N, DAG, DCI, Subtarget); - case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget); + case ISD::TRUNCATE: return combineTruncate(N, DAG, DCI, Subtarget); case X86ISD::VTRUNC: return combineVTRUNC(N, DAG, DCI); case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget); case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget); diff --git a/llvm/test/CodeGen/X86/bmi2.ll b/llvm/test/CodeGen/X86/bmi2.ll --- a/llvm/test/CodeGen/X86/bmi2.ll +++ b/llvm/test/CodeGen/X86/bmi2.ll @@ -305,15 +305,12 @@ ; ; X64-LABEL: mulx32: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: addl %edi, %edi -; X64-NEXT: leal (%rsi,%rsi), %eax -; X64-NEXT: imulq %rdi, %rax -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: shrq $32, %rcx -; X64-NEXT: movl %ecx, (%rdx) -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: leal (%rdi,%rdi), %edx +; X64-NEXT: addl %esi, %esi +; X64-NEXT: mulxl %esi, %eax, %edx +; X64-NEXT: movl %edx, (%rcx) ; X64-NEXT: retq %x1 = add i32 %x, %x %y1 = add i32 %y, %y @@ -340,14 +337,11 @@ ; ; X64-LABEL: mulx32_load: ; X64: # %bb.0: +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: leal (%rdi,%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx -; X64-NEXT: imulq %rcx, %rax -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: shrq $32, %rcx -; X64-NEXT: movl %ecx, (%rdx) -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: leal (%rdi,%rdi), %edx +; X64-NEXT: mulxl (%rsi), %eax, %edx +; X64-NEXT: movl %edx, (%rcx) ; X64-NEXT: retq %x1 = add i32 %x, %x %y1 = load i32, ptr %y diff --git a/llvm/test/CodeGen/X86/smul_fix.ll b/llvm/test/CodeGen/X86/smul_fix.ll --- a/llvm/test/CodeGen/X86/smul_fix.ll +++ b/llvm/test/CodeGen/X86/smul_fix.ll @@ -10,13 +10,9 @@ define i32 @func(i32 %x, i32 %y) nounwind { ; X64-LABEL: func: ; X64: # %bb.0: -; X64-NEXT: movslq %esi, %rax -; X64-NEXT: movslq %edi, %rcx -; X64-NEXT: imulq %rax, %rcx -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: shrq $32, %rax -; X64-NEXT: shldl $30, %ecx, %eax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: imull %esi +; X64-NEXT: shrdl $2, %edx, %eax ; X64-NEXT: retq ; ; X86-LABEL: func: @@ -97,19 +93,17 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; X64-LABEL: func3: ; X64: # %bb.0: -; X64-NEXT: shlb $4, %dil -; X64-NEXT: sarb $4, %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shlb $4, %al +; X64-NEXT: sarb $4, %al ; X64-NEXT: shlb $4, %sil ; X64-NEXT: sarb $4, %sil -; X64-NEXT: movsbl %sil, %ecx -; X64-NEXT: movsbl %dil, %eax -; X64-NEXT: imull %ecx, %eax -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: shrb $2, %cl -; X64-NEXT: shrl $8, %eax -; X64-NEXT: shlb $6, %al -; X64-NEXT: orb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: imulb %sil +; X64-NEXT: movb %ah, %cl +; X64-NEXT: shrb $2, %al +; X64-NEXT: shlb $6, %cl +; X64-NEXT: orb %cl, %al ; X64-NEXT: retq ; ; X86-LABEL: func3: @@ -120,13 +114,10 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: shlb $4, %cl ; X86-NEXT: sarb $4, %cl -; X86-NEXT: movsbl %cl, %ecx -; X86-NEXT: movsbl %al, %eax -; X86-NEXT: imull %ecx, %eax -; X86-NEXT: shlb $6, %ah +; X86-NEXT: imulb %cl ; X86-NEXT: shrb $2, %al +; X86-NEXT: shlb $6, %ah ; X86-NEXT: orb %ah, %al -; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl %tmp = call i4 @llvm.smul.fix.i4(i4 %x, i4 %y, i32 2) ret i4 %tmp diff --git a/llvm/test/CodeGen/X86/smul_fix_sat.ll b/llvm/test/CodeGen/X86/smul_fix_sat.ll --- a/llvm/test/CodeGen/X86/smul_fix_sat.ll +++ b/llvm/test/CodeGen/X86/smul_fix_sat.ll @@ -10,18 +10,15 @@ define i32 @func(i32 %x, i32 %y) nounwind { ; X64-LABEL: func: ; X64: # %bb.0: -; X64-NEXT: movslq %esi, %rax -; X64-NEXT: movslq %edi, %rcx -; X64-NEXT: imulq %rax, %rcx -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: shrq $32, %rax -; X64-NEXT: shrdl $2, %eax, %ecx -; X64-NEXT: cmpl $2, %eax -; X64-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF -; X64-NEXT: cmovll %ecx, %edx -; X64-NEXT: cmpl $-2, %eax -; X64-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X64-NEXT: cmovgel %edx, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: imull %esi +; X64-NEXT: shrdl $2, %edx, %eax +; X64-NEXT: cmpl $2, %edx +; X64-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-NEXT: cmovgel %ecx, %eax +; X64-NEXT: cmpl $-2, %edx +; X64-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 +; X64-NEXT: cmovll %ecx, %eax ; X64-NEXT: retq ; ; X86-LABEL: func: @@ -139,16 +136,15 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; X64-LABEL: func3: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: shlb $4, %sil ; X64-NEXT: sarb $4, %sil -; X64-NEXT: shlb $4, %dil -; X64-NEXT: movsbl %dil, %eax -; X64-NEXT: movsbl %sil, %ecx -; X64-NEXT: imull %eax, %ecx -; X64-NEXT: movl %ecx, %eax +; X64-NEXT: shlb $4, %al +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: imulb %sil +; X64-NEXT: movb %ah, %cl ; X64-NEXT: shrb $2, %al -; X64-NEXT: shrl $8, %ecx -; X64-NEXT: movl %ecx, %edx +; X64-NEXT: movb %ah, %dl ; X64-NEXT: shlb $6, %dl ; X64-NEXT: orb %al, %dl ; X64-NEXT: movzbl %dl, %eax @@ -164,19 +160,17 @@ ; ; X86-LABEL: func3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shlb $4, %al -; X86-NEXT: sarb $4, %al ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: shlb $4, %cl -; X86-NEXT: movsbl %cl, %ecx -; X86-NEXT: movsbl %al, %eax -; X86-NEXT: imull %ecx, %eax +; X86-NEXT: sarb $4, %cl +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shlb $4, %al +; X86-NEXT: imulb %cl +; X86-NEXT: shrb $2, %al ; X86-NEXT: movb %ah, %cl ; X86-NEXT: shlb $6, %cl -; X86-NEXT: shrb $2, %al -; X86-NEXT: orb %cl, %al -; X86-NEXT: movzbl %al, %ecx +; X86-NEXT: orb %al, %cl +; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: cmpb $2, %ah ; X86-NEXT: movl $127, %edx ; X86-NEXT: cmovll %ecx, %edx @@ -193,67 +187,51 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-LABEL: vec: ; X64: # %bb.0: -; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X64-NEXT: movd %xmm2, %eax -; X64-NEXT: cltq ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X64-NEXT: movd %xmm2, %eax +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] ; X64-NEXT: movd %xmm2, %ecx -; X64-NEXT: movslq %ecx, %rdx -; X64-NEXT: imulq %rax, %rdx -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: shrq $32, %rcx -; X64-NEXT: shrdl $2, %ecx, %edx -; X64-NEXT: cmpl $2, %ecx -; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X64-NEXT: cmovgel %eax, %edx -; X64-NEXT: cmpl $-2, %ecx -; X64-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X64-NEXT: cmovll %ecx, %edx -; X64-NEXT: movd %edx, %xmm2 +; X64-NEXT: imull %ecx +; X64-NEXT: shrdl $2, %edx, %eax +; X64-NEXT: cmpl $2, %edx +; X64-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-NEXT: cmovgel %ecx, %eax +; X64-NEXT: cmpl $-2, %edx +; X64-NEXT: movl $-2147483648, %esi # imm = 0x80000000 +; X64-NEXT: cmovll %esi, %eax +; X64-NEXT: movd %eax, %xmm2 +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X64-NEXT: movd %xmm3, %eax ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] ; X64-NEXT: movd %xmm3, %edx -; X64-NEXT: movslq %edx, %rdx -; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] -; X64-NEXT: movd %xmm3, %esi -; X64-NEXT: movslq %esi, %rsi -; X64-NEXT: imulq %rdx, %rsi -; X64-NEXT: movq %rsi, %rdx -; X64-NEXT: shrq $32, %rdx -; X64-NEXT: shrdl $2, %edx, %esi +; X64-NEXT: imull %edx +; X64-NEXT: shrdl $2, %edx, %eax ; X64-NEXT: cmpl $2, %edx -; X64-NEXT: cmovgel %eax, %esi +; X64-NEXT: cmovgel %ecx, %eax ; X64-NEXT: cmpl $-2, %edx -; X64-NEXT: cmovll %ecx, %esi -; X64-NEXT: movd %esi, %xmm3 +; X64-NEXT: cmovll %esi, %eax +; X64-NEXT: movd %eax, %xmm3 ; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; X64-NEXT: movd %xmm0, %eax ; X64-NEXT: movd %xmm1, %edx -; X64-NEXT: movslq %edx, %rdx -; X64-NEXT: movd %xmm0, %esi -; X64-NEXT: movslq %esi, %rsi -; X64-NEXT: imulq %rdx, %rsi -; X64-NEXT: movq %rsi, %rdx -; X64-NEXT: shrq $32, %rdx -; X64-NEXT: shrdl $2, %edx, %esi +; X64-NEXT: imull %edx +; X64-NEXT: shrdl $2, %edx, %eax ; X64-NEXT: cmpl $2, %edx -; X64-NEXT: cmovgel %eax, %esi +; X64-NEXT: cmovgel %ecx, %eax ; X64-NEXT: cmpl $-2, %edx -; X64-NEXT: cmovll %ecx, %esi -; X64-NEXT: movd %esi, %xmm2 -; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X64-NEXT: movd %xmm1, %edx -; X64-NEXT: movslq %edx, %rdx +; X64-NEXT: cmovll %esi, %eax +; X64-NEXT: movd %eax, %xmm2 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; X64-NEXT: movd %xmm0, %esi -; X64-NEXT: movslq %esi, %rsi -; X64-NEXT: imulq %rdx, %rsi -; X64-NEXT: movq %rsi, %rdx -; X64-NEXT: shrq $32, %rdx -; X64-NEXT: shrdl $2, %edx, %esi +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] +; X64-NEXT: movd %xmm0, %edx +; X64-NEXT: imull %edx +; X64-NEXT: shrdl $2, %edx, %eax ; X64-NEXT: cmpl $2, %edx -; X64-NEXT: cmovgel %eax, %esi +; X64-NEXT: cmovgel %ecx, %eax ; X64-NEXT: cmpl $-2, %edx -; X64-NEXT: cmovll %ecx, %esi -; X64-NEXT: movd %esi, %xmm0 +; X64-NEXT: cmovll %esi, %eax +; X64-NEXT: movd %eax, %xmm0 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; X64-NEXT: movdqa %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/umul_fix.ll b/llvm/test/CodeGen/X86/umul_fix.ll --- a/llvm/test/CodeGen/X86/umul_fix.ll +++ b/llvm/test/CodeGen/X86/umul_fix.ll @@ -10,13 +10,9 @@ define i32 @func(i32 %x, i32 %y) nounwind { ; X64-LABEL: func: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: imulq %rax, %rcx -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: shrq $32, %rax -; X64-NEXT: shldl $30, %ecx, %eax -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: mull %esi +; X64-NEXT: shrdl $2, %edx, %eax ; X64-NEXT: retq ; ; X86-LABEL: func: diff --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll --- a/llvm/test/CodeGen/X86/umul_fix_sat.ll +++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll @@ -10,15 +10,12 @@ define i32 @func(i32 %x, i32 %y) nounwind { ; X64-LABEL: func: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: imulq %rax, %rcx -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: shrq $32, %rax -; X64-NEXT: shrdl $2, %eax, %ecx -; X64-NEXT: cmpl $4, %eax -; X64-NEXT: movl $-1, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: mull %esi +; X64-NEXT: shrdl $2, %edx, %eax +; X64-NEXT: cmpl $4, %edx +; X64-NEXT: movl $-1, %ecx +; X64-NEXT: cmovael %ecx, %eax ; X64-NEXT: retq ; ; X86-LABEL: func: @@ -93,38 +90,35 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; X64-LABEL: func3: ; X64: # %bb.0: -; X64-NEXT: andl $15, %esi -; X64-NEXT: shlb $4, %dil -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: imull %esi, %eax -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: shrb $2, %cl -; X64-NEXT: shrl $8, %eax -; X64-NEXT: movl %eax, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $15, %sil +; X64-NEXT: shlb $4, %al +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: mulb %sil +; X64-NEXT: shrb $2, %al +; X64-NEXT: movb %ah, %dl ; X64-NEXT: shlb $6, %dl -; X64-NEXT: orb %cl, %dl -; X64-NEXT: movzbl %dl, %ecx -; X64-NEXT: cmpb $4, %al +; X64-NEXT: orb %al, %dl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: cmpb $4, %ah ; X64-NEXT: movl $255, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: cmovbl %edx, %eax ; X64-NEXT: shrb $4, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X86-LABEL: func3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $15, %al ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl %al, %edx -; X86-NEXT: shlb $4, %cl -; X86-NEXT: movzbl %cl, %eax -; X86-NEXT: imull %edx, %eax +; X86-NEXT: andb $15, %cl +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shlb $4, %al +; X86-NEXT: mulb %cl +; X86-NEXT: shrb $2, %al ; X86-NEXT: movb %ah, %cl ; X86-NEXT: shlb $6, %cl -; X86-NEXT: shrb $2, %al -; X86-NEXT: orb %cl, %al -; X86-NEXT: movzbl %al, %ecx +; X86-NEXT: orb %al, %cl +; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: cmpb $4, %ah ; X86-NEXT: movl $255, %eax ; X86-NEXT: cmovbl %ecx, %eax @@ -138,50 +132,42 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-LABEL: vec: ; X64: # %bb.0: -; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X64-NEXT: movd %xmm2, %eax ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X64-NEXT: movd %xmm2, %eax +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] ; X64-NEXT: movd %xmm2, %ecx -; X64-NEXT: imulq %rax, %rcx -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: shrq $32, %rax -; X64-NEXT: shrdl $2, %eax, %ecx -; X64-NEXT: cmpl $4, %eax -; X64-NEXT: movl $-1, %eax -; X64-NEXT: cmovael %eax, %ecx -; X64-NEXT: movd %ecx, %xmm2 -; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] -; X64-NEXT: movd %xmm3, %ecx +; X64-NEXT: mull %ecx +; X64-NEXT: shrdl $2, %edx, %eax +; X64-NEXT: cmpl $4, %edx +; X64-NEXT: movl $-1, %ecx +; X64-NEXT: cmovael %ecx, %eax +; X64-NEXT: movd %eax, %xmm2 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X64-NEXT: movd %xmm3, %eax +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] ; X64-NEXT: movd %xmm3, %edx -; X64-NEXT: imulq %rcx, %rdx -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: shrq $32, %rcx -; X64-NEXT: shrdl $2, %ecx, %edx -; X64-NEXT: cmpl $4, %ecx -; X64-NEXT: cmovael %eax, %edx -; X64-NEXT: movd %edx, %xmm3 +; X64-NEXT: mull %edx +; X64-NEXT: shrdl $2, %edx, %eax +; X64-NEXT: cmpl $4, %edx +; X64-NEXT: cmovael %ecx, %eax +; X64-NEXT: movd %eax, %xmm3 ; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; X64-NEXT: movd %xmm1, %ecx -; X64-NEXT: movd %xmm0, %edx -; X64-NEXT: imulq %rcx, %rdx -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: shrq $32, %rcx -; X64-NEXT: shrdl $2, %ecx, %edx -; X64-NEXT: cmpl $4, %ecx -; X64-NEXT: cmovael %eax, %edx -; X64-NEXT: movd %edx, %xmm2 -; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1] -; X64-NEXT: movd %xmm1, %ecx +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: movd %xmm1, %edx +; X64-NEXT: mull %edx +; X64-NEXT: shrdl $2, %edx, %eax +; X64-NEXT: cmpl $4, %edx +; X64-NEXT: cmovael %ecx, %eax +; X64-NEXT: movd %eax, %xmm2 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] ; X64-NEXT: movd %xmm0, %edx -; X64-NEXT: imulq %rcx, %rdx -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: shrq $32, %rcx -; X64-NEXT: shrdl $2, %ecx, %edx -; X64-NEXT: cmpl $4, %ecx -; X64-NEXT: cmovael %eax, %edx -; X64-NEXT: movd %edx, %xmm0 +; X64-NEXT: mull %edx +; X64-NEXT: shrdl $2, %edx, %eax +; X64-NEXT: cmpl $4, %edx +; X64-NEXT: cmovael %ecx, %eax +; X64-NEXT: movd %eax, %xmm0 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; X64-NEXT: movdqa %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll --- a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll +++ b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll @@ -43,56 +43,57 @@ define <4 x i16> @smulfixsat(<4 x i16> %a) { ; CHECK-LABEL: smulfixsat: ; CHECK: # %bb.0: -; CHECK-NEXT: pextrw $2, %xmm0, %eax -; CHECK-NEXT: cwtl -; CHECK-NEXT: leal (%rax,%rax,2), %ecx -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: shrl $16, %edx -; CHECK-NEXT: shldw $1, %cx, %dx -; CHECK-NEXT: sarl $16, %ecx -; CHECK-NEXT: cmpl $16384, %ecx # imm = 0x4000 -; CHECK-NEXT: movl $32767, %eax # imm = 0x7FFF -; CHECK-NEXT: cmovgel %eax, %edx -; CHECK-NEXT: cmpl $-16384, %ecx # imm = 0xC000 -; CHECK-NEXT: movl $32768, %ecx # imm = 0x8000 -; CHECK-NEXT: cmovll %ecx, %edx -; CHECK-NEXT: pextrw $1, %xmm0, %esi -; CHECK-NEXT: leal (%rsi,%rsi), %edi -; CHECK-NEXT: movswl %si, %r8d -; CHECK-NEXT: movl %r8d, %esi -; CHECK-NEXT: shrl $16, %esi -; CHECK-NEXT: shldw $1, %di, %si -; CHECK-NEXT: sarl $16, %r8d -; CHECK-NEXT: cmpl $16384, %r8d # imm = 0x4000 -; CHECK-NEXT: cmovgel %eax, %esi -; CHECK-NEXT: cmpl $-16384, %r8d # imm = 0xC000 -; CHECK-NEXT: cmovll %ecx, %esi -; CHECK-NEXT: movd %xmm0, %edi -; CHECK-NEXT: movswl %di, %edi -; CHECK-NEXT: movl %edi, %r8d -; CHECK-NEXT: shrl $16, %r8d -; CHECK-NEXT: shldw $1, %di, %r8w -; CHECK-NEXT: sarl $16, %edi -; CHECK-NEXT: cmpl $16384, %edi # imm = 0x4000 -; CHECK-NEXT: cmovgel %eax, %r8d -; CHECK-NEXT: cmpl $-16384, %edi # imm = 0xC000 -; CHECK-NEXT: cmovll %ecx, %r8d -; CHECK-NEXT: movzwl %r8w, %edi -; CHECK-NEXT: movd %edi, %xmm1 -; CHECK-NEXT: pinsrw $1, %esi, %xmm1 -; CHECK-NEXT: pinsrw $2, %edx, %xmm1 -; CHECK-NEXT: pextrw $3, %xmm0, %edx +; CHECK-NEXT: pextrw $1, %xmm0, %eax +; CHECK-NEXT: movw $2, %cx +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: imulw %cx +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrdw $15, %dx, %cx +; CHECK-NEXT: movswl %dx, %eax +; CHECK-NEXT: cmpl $16384, %eax # imm = 0x4000 +; CHECK-NEXT: movl $32767, %esi # imm = 0x7FFF +; CHECK-NEXT: cmovgel %esi, %ecx +; CHECK-NEXT: cmpl $-16384, %eax # imm = 0xC000 +; CHECK-NEXT: movl $32768, %edi # imm = 0x8000 +; CHECK-NEXT: cmovll %edi, %ecx +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: movw $1, %dx +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: imulw %dx +; CHECK-NEXT: # kill: def $ax killed $ax def $eax +; CHECK-NEXT: shrdw $15, %dx, %ax ; CHECK-NEXT: movswl %dx, %edx -; CHECK-NEXT: leal (,%rdx,4), %esi -; CHECK-NEXT: movl %esi, %edi -; CHECK-NEXT: shrl $16, %edi -; CHECK-NEXT: shldw $1, %si, %di -; CHECK-NEXT: sarl $14, %edx ; CHECK-NEXT: cmpl $16384, %edx # imm = 0x4000 -; CHECK-NEXT: cmovgel %eax, %edi +; CHECK-NEXT: cmovgel %esi, %eax ; CHECK-NEXT: cmpl $-16384, %edx # imm = 0xC000 -; CHECK-NEXT: cmovll %ecx, %edi -; CHECK-NEXT: pinsrw $3, %edi, %xmm1 +; CHECK-NEXT: cmovll %edi, %eax +; CHECK-NEXT: movzwl %ax, %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: pinsrw $1, %ecx, %xmm1 +; CHECK-NEXT: pextrw $2, %xmm0, %eax +; CHECK-NEXT: movw $3, %cx +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: imulw %cx +; CHECK-NEXT: # kill: def $ax killed $ax def $eax +; CHECK-NEXT: shrdw $15, %dx, %ax +; CHECK-NEXT: movswl %dx, %ecx +; CHECK-NEXT: cmpl $16384, %ecx # imm = 0x4000 +; CHECK-NEXT: cmovgel %esi, %eax +; CHECK-NEXT: cmpl $-16384, %ecx # imm = 0xC000 +; CHECK-NEXT: cmovll %edi, %eax +; CHECK-NEXT: pinsrw $2, %eax, %xmm1 +; CHECK-NEXT: pextrw $3, %xmm0, %eax +; CHECK-NEXT: movw $4, %cx +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: imulw %cx +; CHECK-NEXT: # kill: def $ax killed $ax def $eax +; CHECK-NEXT: shrdw $15, %dx, %ax +; CHECK-NEXT: movswl %dx, %ecx +; CHECK-NEXT: cmpl $16384, %ecx # imm = 0x4000 +; CHECK-NEXT: cmovgel %esi, %eax +; CHECK-NEXT: cmpl $-16384, %ecx # imm = 0xC000 +; CHECK-NEXT: cmovll %edi, %eax +; CHECK-NEXT: pinsrw $3, %eax, %xmm1 ; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %t = call <4 x i16> @llvm.smul.fix.sat.v4i16(<4 x i16> , <4 x i16> %a, i32 15) @@ -104,41 +105,44 @@ ; CHECK-LABEL: umulfixsat: ; CHECK: # %bb.0: ; CHECK-NEXT: pextrw $2, %xmm0, %eax -; CHECK-NEXT: leal (%rax,%rax,2), %eax -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: shrl $16, %edx -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shldw $1, %ax, %cx +; CHECK-NEXT: movw $3, %cx +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: mulw %cx +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrdw $15, %dx, %cx +; CHECK-NEXT: movzwl %dx, %eax +; CHECK-NEXT: cmpl $32768, %eax # imm = 0x8000 +; CHECK-NEXT: movl $65535, %esi # imm = 0xFFFF +; CHECK-NEXT: cmovael %esi, %ecx +; CHECK-NEXT: pextrw $1, %xmm0, %eax +; CHECK-NEXT: movw $2, %dx +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: mulw %dx +; CHECK-NEXT: # kill: def $ax killed $ax def $eax +; CHECK-NEXT: shrdw $15, %dx, %ax +; CHECK-NEXT: movzwl %dx, %edx ; CHECK-NEXT: cmpl $32768, %edx # imm = 0x8000 -; CHECK-NEXT: movl $65535, %eax # imm = 0xFFFF -; CHECK-NEXT: cmovael %eax, %ecx -; CHECK-NEXT: pextrw $1, %xmm0, %edx -; CHECK-NEXT: addl %edx, %edx -; CHECK-NEXT: movl %edx, %esi -; CHECK-NEXT: shrl $16, %esi -; CHECK-NEXT: movl %esi, %edi -; CHECK-NEXT: shldw $1, %dx, %di -; CHECK-NEXT: cmpl $32768, %esi # imm = 0x8000 -; CHECK-NEXT: cmovael %eax, %edi +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: movd %xmm0, %edx -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: shldw $1, %dx, %si +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: shldw $1, %dx, %di ; CHECK-NEXT: movl $32768, %edx # imm = 0x8000 ; CHECK-NEXT: negl %edx -; CHECK-NEXT: cmovael %eax, %esi -; CHECK-NEXT: movzwl %si, %edx +; CHECK-NEXT: cmovael %esi, %edi +; CHECK-NEXT: movzwl %di, %edx ; CHECK-NEXT: movd %edx, %xmm1 -; CHECK-NEXT: pinsrw $1, %edi, %xmm1 +; CHECK-NEXT: pinsrw $1, %eax, %xmm1 ; CHECK-NEXT: pinsrw $2, %ecx, %xmm1 -; CHECK-NEXT: pextrw $3, %xmm0, %ecx -; CHECK-NEXT: shll $2, %ecx -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: shrl $16, %edx -; CHECK-NEXT: movl %edx, %esi -; CHECK-NEXT: shldw $1, %cx, %si -; CHECK-NEXT: cmpl $32768, %edx # imm = 0x8000 -; CHECK-NEXT: cmovael %eax, %esi -; CHECK-NEXT: pinsrw $3, %esi, %xmm1 +; CHECK-NEXT: pextrw $3, %xmm0, %eax +; CHECK-NEXT: movw $4, %cx +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: mulw %cx +; CHECK-NEXT: # kill: def $ax killed $ax def $eax +; CHECK-NEXT: shrdw $15, %dx, %ax +; CHECK-NEXT: movzwl %dx, %ecx +; CHECK-NEXT: cmpl $32768, %ecx # imm = 0x8000 +; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: pinsrw $3, %eax, %xmm1 ; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %t = call <4 x i16> @llvm.umul.fix.sat.v4i16(<4 x i16> , <4 x i16> %a, i32 15)