diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -380,6 +380,7 @@ SDValue visitSUBE(SDNode *N); SDValue visitSUBCARRY(SDNode *N); SDValue visitMUL(SDNode *N); + SDValue visitMULFIX(SDNode *N); SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N); @@ -1697,6 +1698,9 @@ case ISD::ADDCARRY: return visitADDCARRY(N); case ISD::SUBE: return visitSUBE(N); case ISD::SUBCARRY: return visitSUBCARRY(N); + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: return visitMULFIX(N); case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); @@ -3466,6 +3470,29 @@ return SDValue(); } +// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT and UMULFIX here. +SDValue DAGCombiner::visitMULFIX(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Scale = N->getOperand(2); + EVT VT = N0.getValueType(); + + // fold (mulfix x, undef, scale) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, SDLoc(N), VT); + + // Canonicalize constant to RHS (vector doesn't have to splat) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale); + + // fold (mulfix x, 0, scale) -> 0 + if (isNullConstant(N1)) + return DAG.getConstant(0, SDLoc(N), VT); + + return SDValue(); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); diff --git a/llvm/test/CodeGen/X86/mulfix_combine.ll b/llvm/test/CodeGen/X86/mulfix_combine.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/mulfix_combine.ll @@ -0,0 +1,118 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux -o - | FileCheck %s + +declare i32 @llvm.smul.fix.i32(i32, i32, i32 immarg) +declare i32 @llvm.umul.fix.i32(i32, i32, i32 immarg) +declare i32 @llvm.smul.fix.sat.i32(i32, i32, i32 immarg) + +declare <4 x i32> @llvm.smul.fix.v4i32(<4 x i32>, <4 x i32>, i32 immarg) +declare <4 x i32> @llvm.umul.fix.v4i32(<4 x i32>, <4 x i32>, i32 immarg) +declare <4 x i32> @llvm.smul.fix.sat.v4i32(<4 x i32>, <4 x i32>, i32 immarg) + +define i32 @smulfix_undef(i32 %y) nounwind { +; CHECK-LABEL: smulfix_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq + %tmp = call i32 @llvm.smul.fix.i32(i32 undef, i32 %y, i32 2) + ret i32 %tmp +} + +define i32 @smulfix_zero(i32 %y) nounwind { +; CHECK-LABEL: smulfix_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq + %tmp = call i32 @llvm.smul.fix.i32(i32 0, i32 %y, i32 2) + ret i32 %tmp +} + +define i32 @umulfix_undef(i32 %y) nounwind { +; CHECK-LABEL: umulfix_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq + %tmp = call i32 @llvm.umul.fix.i32(i32 undef, i32 %y, i32 2) + ret i32 %tmp +} + +define i32 @umulfix_zero(i32 %y) nounwind { +; CHECK-LABEL: umulfix_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq + %tmp = call i32 @llvm.umul.fix.i32(i32 0, i32 %y, i32 2) + ret i32 %tmp +} + +define i32 @smulfixsat_undef(i32 %y) nounwind { +; CHECK-LABEL: smulfixsat_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq + %tmp = call i32 @llvm.smul.fix.sat.i32(i32 undef, i32 %y, i32 2) + ret i32 %tmp +} + +define i32 @smulfixsat_zero(i32 %y) nounwind { +; CHECK-LABEL: smulfixsat_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq + %tmp = call i32 @llvm.smul.fix.sat.i32(i32 0, i32 %y, i32 2) + ret i32 %tmp +} + +define <4 x i32> @vec_smulfix_undef(<4 x i32> %y) nounwind { +; CHECK-LABEL: vec_smulfix_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %tmp = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> %y, i32 2) + ret <4 x i32> %tmp +} + +define <4 x i32> @vec_smulfix_zero(<4 x i32> %y) nounwind { +; CHECK-LABEL: vec_smulfix_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %tmp = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> , <4 x i32> %y, i32 2) + ret <4 x i32> %tmp +} + +define <4 x i32> @vec_umulfix_undef(<4 x i32> %y) nounwind { +; CHECK-LABEL: vec_umulfix_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %tmp = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> %y, i32 2) + ret <4 x i32> %tmp +} + +define <4 x i32> @vec_umulfix_zero(<4 x i32> %y) nounwind { +; CHECK-LABEL: vec_umulfix_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %tmp = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> , <4 x i32> %y, i32 2) + ret <4 x i32> %tmp +} + +define <4 x i32> @vec_smulfixsat_undef(<4 x i32> %y) nounwind { +; CHECK-LABEL: vec_smulfixsat_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %tmp = call <4 x i32> @llvm.smul.fix.sat.v4i32(<4 x i32> undef, <4 x i32> %y, i32 2) + ret <4 x i32> %tmp +} + +define <4 x i32> @vec_smulfixsat_zero(<4 x i32> %y) nounwind { +; CHECK-LABEL: vec_smulfixsat_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %tmp = call <4 x i32> @llvm.smul.fix.sat.v4i32(<4 x i32> , <4 x i32> %y, i32 2) + ret <4 x i32> %tmp +} diff --git a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll --- a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll +++ b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll @@ -42,71 +42,57 @@ define <4 x i16> @smulfixsat(<4 x i16> %a) { ; CHECK-LABEL: smulfixsat: ; CHECK: # %bb.0: -; CHECK-NEXT: movdqa %xmm0, %xmm1 -; CHECK-NEXT: pextrw $1, %xmm0, %eax +; CHECK-NEXT: pextrw $2, %xmm0, %eax ; CHECK-NEXT: cwtl -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: shrl $15, %ecx -; CHECK-NEXT: leal (%rax,%rax), %edx -; CHECK-NEXT: shrdw $15, %cx, %dx -; CHECK-NEXT: sarl $15, %eax +; CHECK-NEXT: leal (%rax,%rax,2), %ecx +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: shrl $16, %edx +; CHECK-NEXT: shldw $1, %cx, %dx +; CHECK-NEXT: sarl $16, %ecx +; CHECK-NEXT: cmpl $16383, %ecx # imm = 0x3FFF +; CHECK-NEXT: movl $32767, %r8d # imm = 0x7FFF +; CHECK-NEXT: cmovgl %r8d, %edx +; CHECK-NEXT: cmpl $-16384, %ecx # imm = 0xC000 +; CHECK-NEXT: movl $32768, %ecx # imm = 0x8000 +; CHECK-NEXT: cmovll %ecx, %edx +; CHECK-NEXT: pextrw $1, %xmm0, %esi +; CHECK-NEXT: movswl %si, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $15, %eax +; CHECK-NEXT: leal (%rdi,%rdi), %esi +; CHECK-NEXT: shrdw $15, %ax, %si +; CHECK-NEXT: sarl $15, %edi +; CHECK-NEXT: cmpl $16383, %edi # imm = 0x3FFF +; CHECK-NEXT: cmovgl %r8d, %esi +; CHECK-NEXT: cmpl $-16384, %edi # imm = 0xC000 +; CHECK-NEXT: cmovll %ecx, %esi +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: cwtl +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: shrl $16, %edi +; CHECK-NEXT: shldw $1, %ax, %di +; CHECK-NEXT: sarl $16, %eax +; CHECK-NEXT: cmpl $16383, %eax # imm = 0x3FFF +; CHECK-NEXT: cmovgl %r8d, %edi +; CHECK-NEXT: cmpl $-16384, %eax # imm = 0xC000 +; CHECK-NEXT: cmovll %ecx, %edi +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: pinsrw $0, %edi, %xmm1 +; CHECK-NEXT: pinsrw $1, %esi, %xmm1 +; CHECK-NEXT: pinsrw $2, %edx, %xmm1 +; CHECK-NEXT: pextrw $3, %xmm0, %eax +; CHECK-NEXT: cwtl +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: shrl $14, %edx +; CHECK-NEXT: leal (,%rax,4), %esi +; CHECK-NEXT: shrdw $15, %dx, %si +; CHECK-NEXT: sarl $14, %eax ; CHECK-NEXT: cmpl $16383, %eax # imm = 0x3FFF -; CHECK-NEXT: movl $32767, %ecx # imm = 0x7FFF -; CHECK-NEXT: cmovgl %ecx, %edx +; CHECK-NEXT: cmovgl %r8d, %esi ; CHECK-NEXT: cmpl $-16384, %eax # imm = 0xC000 -; CHECK-NEXT: movl $32768, %eax # imm = 0x8000 -; CHECK-NEXT: cmovll %eax, %edx -; CHECK-NEXT: movd %edx, %xmm2 -; CHECK-NEXT: movd %xmm0, %edx -; CHECK-NEXT: movswl %dx, %edx -; CHECK-NEXT: movl %edx, %esi -; CHECK-NEXT: shrl $16, %esi -; CHECK-NEXT: shldw $1, %dx, %si -; CHECK-NEXT: sarl $16, %edx -; CHECK-NEXT: cmpl $16383, %edx # imm = 0x3FFF -; CHECK-NEXT: cmovgl %ecx, %esi -; CHECK-NEXT: cmpl $-16384, %edx # imm = 0xC000 -; CHECK-NEXT: cmovll %eax, %esi -; CHECK-NEXT: movd %esi, %xmm0 -; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; CHECK-NEXT: pextrw $2, %xmm1, %edx -; CHECK-NEXT: movswl %dx, %edx -; CHECK-NEXT: leal (%rdx,%rdx,2), %edx -; CHECK-NEXT: movl %edx, %esi -; CHECK-NEXT: shrl $16, %esi -; CHECK-NEXT: shldw $1, %dx, %si -; CHECK-NEXT: sarl $16, %edx -; CHECK-NEXT: cmpl $16383, %edx # imm = 0x3FFF -; CHECK-NEXT: cmovgl %ecx, %esi -; CHECK-NEXT: cmpl $-16384, %edx # imm = 0xC000 -; CHECK-NEXT: cmovll %eax, %esi -; CHECK-NEXT: movd %esi, %xmm2 -; CHECK-NEXT: pextrw $3, %xmm1, %edx -; CHECK-NEXT: movswl %dx, %edx -; CHECK-NEXT: movl %edx, %esi -; CHECK-NEXT: shrl $14, %esi -; CHECK-NEXT: leal (,%rdx,4), %edi -; CHECK-NEXT: shrdw $15, %si, %di -; CHECK-NEXT: sarl $14, %edx -; CHECK-NEXT: cmpl $16383, %edx # imm = 0x3FFF -; CHECK-NEXT: cmovgl %ecx, %edi -; CHECK-NEXT: cmpl $-16384, %edx # imm = 0xC000 -; CHECK-NEXT: cmovll %eax, %edi -; CHECK-NEXT: movd %edi, %xmm1 -; CHECK-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: shrdw $15, %dx, %dx -; CHECK-NEXT: movl $16383, %esi # imm = 0x3FFF -; CHECK-NEXT: negl %esi -; CHECK-NEXT: cmovgl %ecx, %edx -; CHECK-NEXT: movl $-16384, %ecx # imm = 0xC000 -; CHECK-NEXT: negl %ecx -; CHECK-NEXT: cmovll %eax, %edx -; CHECK-NEXT: movd %edx, %xmm1 -; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] -; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0,0,1,1] -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: cmovll %ecx, %esi +; CHECK-NEXT: pinsrw $3, %esi, %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %t = call <4 x i16> @llvm.smul.fix.sat.v4i16(<4 x i16> , <4 x i16> %a, i32 15) ret <4 x i16> %t