Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2157,10 +2157,10 @@ // (mul (shl X, c1), c2) -> (mul X, c2 << c1) if (N0.getOpcode() == ISD::SHL && - isConstantOrConstantVector(N1) && - isConstantOrConstantVector(N0.getOperand(1))) { + isConstantOrConstantVector(N1, /* NoOpaques */ true) && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); - AddToWorklist(C3.getNode()); + assert(isConstantOrConstantVector(C3)); return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); } @@ -4714,7 +4714,7 @@ isConstantOrConstantVector(N1, /* No Opaques */ true) && isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); - AddToWorklist(Shl.getNode()); + assert(isConstantOrConstantVector(Shl)); return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl); } Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3469,6 +3469,11 @@ EVT SVT = VT.getScalarType(); SmallVector Outputs; for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { + // If one of the input elements is undef so is the output + if (BV1->getOperand(I)->isUndef() || BV2->getOperand(I)->isUndef()) { + Outputs.push_back(getUNDEF(SVT)); + continue; + } ConstantSDNode *V1 = dyn_cast(BV1->getOperand(I)); ConstantSDNode *V2 = dyn_cast(BV2->getOperand(I)); if (!V1 || !V2) // Not a constant, bail. Index: test/CodeGen/AArch64/dag-combine-mul-shl.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/dag-combine-mul-shl.ll @@ -0,0 +1,117 @@ +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +; CHECK-LABEL: fn1_vector: +; CHECK: adrp x[[BASE:[0-9]+]], .LCP +; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]], +; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b +; CHECK-NEXT: ret +define <16 x i8> @fn1_vector(<16 x i8> %arg) { +entry: + %shl = shl <16 x i8> %arg, + %mul = mul <16 x i8> %shl, + ret <16 x i8> %mul +} + +; CHECK-LABEL: fn2_vector: +; CHECK: adrp x[[BASE:[0-9]+]], .LCP +; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]], +; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b +; CHECK-NEXT: ret +define <16 x i8> @fn2_vector(<16 x i8> %arg) { +entry: + %mul = mul <16 x i8> %arg, + %shl = shl <16 x i8> %mul, + ret <16 x i8> %shl +} + +; CHECK-LABEL: fn1_vector_undef: +; CHECK: adrp x[[BASE:[0-9]+]], .LCP +; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]], +; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b +; CHECK-NEXT: ret +define <16 x i8> @fn1_vector_undef(<16 x i8> %arg) { +entry: + %shl = shl <16 x i8> %arg, + %mul = mul <16 x i8> %shl, + ret <16 x i8> %mul +} + +; CHECK-LABEL: fn2_vector_undef: +; CHECK: adrp x[[BASE:[0-9]+]], .LCP +; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]], +; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b +; CHECK-NEXT: ret +define <16 x i8> @fn2_vector_undef(<16 x i8> %arg) { +entry: + %mul = mul <16 x i8> %arg, + %shl = shl <16 x i8> %mul, + ret <16 x i8> %shl +} + +; CHECK-LABEL: fn1_scalar: +; CHECK: mov w[[REG:[0-9]+]], #1664 +; CHECK-NEXT: mul w0, w0, w[[REG]] +; CHECK-NEXT: ret +define i32 @fn1_scalar(i32 %arg) { +entry: + %shl = shl i32 %arg, 7 + %mul = mul i32 %shl, 13 + ret i32 %mul +} + +; CHECK-LABEL: fn2_scalar: +; CHECK: mov w[[REG:[0-9]+]], #1664 +; CHECK-NEXT: mul w0, w0, w[[REG]] +; CHECK-NEXT: ret +define i32 @fn2_scalar(i32 %arg) { +entry: + %mul = mul i32 %arg, 13 + %shl = shl i32 %mul, 7 + ret i32 %shl +} + +; CHECK-LABEL: fn1_scalar_undef: +; CHECK: mov w0 +; CHECK-NEXT: ret +define i32 @fn1_scalar_undef(i32 %arg) { +entry: + %shl = shl i32 %arg, 7 + %mul = mul i32 %shl, undef + ret i32 %mul +} + +; CHECK-LABEL: fn2_scalar_undef: +; CHECK: mov w0 +; CHECK-NEXT: ret +define i32 @fn2_scalar_undef(i32 %arg) { +entry: + %mul = mul i32 %arg, undef + %shl = shl i32 %mul, 7 + ret i32 %shl +} + +; CHECK-LABEL: fn1_scalar_opaque: +; CHECK: mov w[[REG:[0-9]+]], #13 +; CHECK-NEXT: mul w[[REG]], w0, w[[REG]] +; CHECK-NEXT: lsl w0, w[[REG]], #7 +; CHECK-NEXT: ret +define i32 @fn1_scalar_opaque(i32 %arg) { +entry: + %bitcast = bitcast i32 13 to i32 + %shl = shl i32 %arg, 7 + %mul = mul i32 %shl, %bitcast + ret i32 %mul +} + +; CHECK-LABEL: fn2_scalar_opaque: +; CHECK: mov w[[REG:[0-9]+]], #13 +; CHECK-NEXT: mul w[[REG]], w0, w[[REG]] +; CHECK-NEXT: lsl w0, w[[REG]], #7 +; CHECK-NEXT: ret +define i32 @fn2_scalar_opaque(i32 %arg) { +entry: + %bitcast = bitcast i32 13 to i32 + %mul = mul i32 %arg, %bitcast + %shl = shl i32 %mul, 7 + ret i32 %shl +} Index: test/CodeGen/X86/legalize-shl-vec.ll =================================================================== --- test/CodeGen/X86/legalize-shl-vec.ll +++ test/CodeGen/X86/legalize-shl-vec.ll @@ -6,14 +6,6 @@ ; X32-LABEL: test_shl: ; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl $0, 60(%eax) -; X32-NEXT: movl $0, 56(%eax) -; X32-NEXT: movl $0, 52(%eax) -; X32-NEXT: movl $0, 48(%eax) -; X32-NEXT: movl $0, 44(%eax) -; X32-NEXT: movl $0, 40(%eax) -; X32-NEXT: movl $0, 36(%eax) -; X32-NEXT: movl $0, 32(%eax) ; X32-NEXT: movl $0, 28(%eax) ; X32-NEXT: movl $0, 24(%eax) ; X32-NEXT: movl $0, 20(%eax) @@ -26,10 +18,6 @@ ; ; X64-LABEL: test_shl: ; X64: # BB#0: -; X64-NEXT: movq $0, 56(%rdi) -; X64-NEXT: movq $0, 48(%rdi) -; X64-NEXT: movq $0, 40(%rdi) -; X64-NEXT: movq $0, 32(%rdi) ; X64-NEXT: movq $0, 24(%rdi) ; X64-NEXT: movq $0, 16(%rdi) ; X64-NEXT: movq $0, 8(%rdi) @@ -45,14 +33,6 @@ ; X32-LABEL: test_srl: ; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl $0, 60(%eax) -; X32-NEXT: movl $0, 56(%eax) -; X32-NEXT: movl $0, 52(%eax) -; X32-NEXT: movl $0, 48(%eax) -; X32-NEXT: movl $0, 44(%eax) -; X32-NEXT: movl $0, 40(%eax) -; X32-NEXT: movl $0, 36(%eax) -; X32-NEXT: movl $0, 32(%eax) ; X32-NEXT: movl $0, 28(%eax) ; X32-NEXT: movl $0, 24(%eax) ; X32-NEXT: movl $0, 20(%eax) @@ -65,10 +45,6 @@ ; ; X64-LABEL: test_srl: ; X64: # BB#0: -; X64-NEXT: movq $0, 56(%rdi) -; X64-NEXT: movq $0, 48(%rdi) -; X64-NEXT: movq $0, 40(%rdi) -; X64-NEXT: movq $0, 32(%rdi) ; X64-NEXT: movq $0, 24(%rdi) ; X64-NEXT: movq $0, 16(%rdi) ; X64-NEXT: movq $0, 8(%rdi) @@ -85,22 +61,6 @@ ; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, 60(%eax) -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, 56(%eax) -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, 52(%eax) -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, 48(%eax) -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, 44(%eax) -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, 40(%eax) -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, 36(%eax) -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, 32(%eax) -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: sarl $31, %ecx ; X32-NEXT: movl %ecx, 28(%eax) ; X32-NEXT: movl %ecx, 24(%eax) @@ -114,14 +74,7 @@ ; ; X64-LABEL: test_sra: ; X64: # BB#0: -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; X64-NEXT: sarq $63, %r8 -; X64-NEXT: movq %rdx, 56(%rdi) -; X64-NEXT: movq %rcx, 48(%rdi) -; X64-NEXT: movq %rax, 40(%rdi) -; X64-NEXT: movq %r9, 32(%rdi) ; X64-NEXT: movq %r8, 24(%rdi) ; X64-NEXT: movq %r8, 16(%rdi) ; X64-NEXT: movq %r8, 8(%rdi) Index: test/CodeGen/X86/merge-consecutive-loads-128.ll =================================================================== --- test/CodeGen/X86/merge-consecutive-loads-128.ll +++ test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -962,9 +962,6 @@ ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE1-NEXT: movl (%ecx), %ecx ; X32-SSE1-NEXT: movl %ecx, (%eax) -; X32-SSE1-NEXT: movl $0, 12(%eax) -; X32-SSE1-NEXT: movl $0, 8(%eax) -; X32-SSE1-NEXT: movl $0, 4(%eax) ; X32-SSE1-NEXT: retl ; ; X32-SSE41-LABEL: merge_4i32_i32_combine: Index: test/CodeGen/X86/shift-pcmp.ll =================================================================== --- test/CodeGen/X86/shift-pcmp.ll +++ test/CodeGen/X86/shift-pcmp.ll @@ -26,15 +26,13 @@ ; SSE-LABEL: bar: ; SSE: # BB#0: ; SSE-NEXT: pcmpeqw %xmm1, %xmm0 -; SSE-NEXT: psrlw $15, %xmm0 -; SSE-NEXT: psllw $5, %xmm0 +; SSE-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: bar: ; AVX: # BB#0: ; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0 -; AVX-NEXT: vpsllw $5, %xmm0, %xmm0 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; %icmp = icmp eq <8 x i16> %a, %b Index: test/CodeGen/X86/widen_load-2.ll =================================================================== --- test/CodeGen/X86/widen_load-2.ll +++ test/CodeGen/X86/widen_load-2.ll @@ -384,12 +384,10 @@ ; X86-NEXT: pextrw $0, %xmm0, (%ecx) ; X86-NEXT: movb $1, 2(%ecx) ; X86-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; X86-NEXT: movdqa %xmm0, %xmm1 -; X86-NEXT: psrld $1, %xmm1 -; X86-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm0[6,7] -; X86-NEXT: pextrb $8, %xmm1, 2(%eax) -; X86-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X86-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; X86-NEXT: psrld $1, %xmm0 +; X86-NEXT: pextrb $8, %xmm0, 2(%eax) +; X86-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; X86-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; X86-NEXT: pextrw $0, %xmm0, (%eax) ; X86-NEXT: addl $16, %esp ; X86-NEXT: retl $4 @@ -408,12 +406,10 @@ ; X64-NEXT: pextrw $0, %xmm0, (%rdx) ; X64-NEXT: movb $1, 2(%rdx) ; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; X64-NEXT: movdqa %xmm0, %xmm1 -; X64-NEXT: psrld $1, %xmm1 -; X64-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm0[6,7] -; X64-NEXT: pextrb $8, %xmm1, 2(%rdi) -; X64-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X64-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; X64-NEXT: psrld $1, %xmm0 +; X64-NEXT: pextrb $8, %xmm0, 2(%rdi) +; X64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; X64-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; X64-NEXT: pextrw $0, %xmm0, (%rdi) ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq