Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7200,8 +7200,40 @@ } } - // fold (srl (shl x, c), c) -> (and x, cst2) - // TODO - (srl (shl x, c1), c2). + // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or + // (and (srl x, (sub c2, c1), MASK) + // Only fold this if the inner shift has no other uses -- if it does, folding + // this will increase the total number of instructions. + // TODO - drop hasOneUse requirement if c1 == c2? + // TODO - support non-uniform vector shift amounts. + if (N1C && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && + TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + if (N0C1->getAPIntValue().ult(OpSizeInBits)) { + uint64_t c1 = N0C1->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - c1); + SDValue Shift; + if (c2 > c1) { + Mask.lshrInPlace(c2 - c1); + SDLoc DL(N); + Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(c2 - c1, DL, N1.getValueType())); + } else { + Mask <<= c1 - c2; + SDLoc DL(N); + Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 - c2, DL, N1.getValueType())); + } + SDLoc DL(N0); + AddToWorklist(Shift.getNode()); + return DAG.getNode(ISD::AND, DL, VT, Shift, + DAG.getConstant(Mask, DL, VT)); + } + } + } + + // fold (srl (shl x, c1), c1) -> (and x, (srl -1, c1)) if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && isConstantOrConstantVector(N1, /* NoOpaques */ true)) { SDLoc DL(N); Index: test/CodeGen/AArch64/arm64-bitfield-extract.ll =================================================================== --- test/CodeGen/AArch64/arm64-bitfield-extract.ll +++ test/CodeGen/AArch64/arm64-bitfield-extract.ll @@ -393,8 +393,9 @@ ; LLC-LABEL: fct12: ; LLC: // %bb.0: // %entry ; LLC-NEXT: ldr w8, [x0] +; LLC-NEXT: and w8, w8, #0x3ffffff8 ; LLC-NEXT: bfxil w8, w1, #16, #3 -; LLC-NEXT: ubfx w8, w8, #2, #28 +; LLC-NEXT: lsr w8, w8, #2 ; LLC-NEXT: str w8, [x0] ; LLC-NEXT: ret ; OPT-LABEL: @fct12( @@ -429,8 +430,9 @@ ; LLC-LABEL: fct13: ; LLC: // %bb.0: // %entry ; LLC-NEXT: ldr x8, [x0] +; LLC-NEXT: and x8, x8, #0x3ffffffffffffff8 ; LLC-NEXT: bfxil x8, x1, #16, #3 -; LLC-NEXT: ubfx x8, x8, #2, #60 +; LLC-NEXT: lsr x8, x8, #2 ; LLC-NEXT: str x8, [x0] ; LLC-NEXT: ret ; OPT-LABEL: @fct13( @@ -562,7 +564,7 @@ ; LLC-NEXT: movk w9, #26, lsl #16 ; LLC-NEXT: and w8, w8, w9 ; LLC-NEXT: bfxil w8, w1, #16, #3 -; LLC-NEXT: ubfx w8, w8, #2, #28 +; LLC-NEXT: lsr w8, w8, #2 ; LLC-NEXT: str w8, [x0] ; LLC-NEXT: ret ; OPT-LABEL: @fct16( @@ -604,7 +606,7 @@ ; LLC-NEXT: movk w9, #26, lsl #16 ; LLC-NEXT: and x8, x8, x9 ; LLC-NEXT: bfxil x8, x1, #16, #3 -; LLC-NEXT: ubfx x8, x8, #2, #60 +; LLC-NEXT: lsr x8, x8, #2 ; LLC-NEXT: str x8, [x0] ; LLC-NEXT: ret ; OPT-LABEL: @fct17( Index: test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll @@ -683,8 +683,8 @@ ; SI-NEXT: s_mov_b32 s4, s0 ; SI-NEXT: s_mov_b32 s5, s1 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_lshlrev_b32_e32 v0, 31, v0 -; SI-NEXT: v_lshrrev_b32_e32 v0, 1, v0 +; SI-NEXT: v_lshlrev_b32_e32 v0, 30, v0 +; SI-NEXT: v_and_b32_e32 v0, 2.0, v0 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; @@ -702,8 +702,8 @@ ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b32_e32 v0, 31, v0 -; VI-NEXT: v_lshrrev_b32_e32 v0, 1, v0 +; VI-NEXT: v_lshlrev_b32_e32 v0, 30, v0 +; VI-NEXT: v_and_b32_e32 v0, 2.0, v0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %x = load i32, i32 addrspace(1)* %in, align 4 Index: test/CodeGen/X86/pr32588.ll =================================================================== --- test/CodeGen/X86/pr32588.ll +++ test/CodeGen/X86/pr32588.ll @@ -8,9 +8,9 @@ define void @fn1() { ; CHECK-LABEL: fn1: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, {{.*}}(%rip) -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpl $0, {{.*}}(%rip) +; CHECK-NEXT: sete %al ; CHECK-NEXT: movl %eax, {{.*}}(%rip) ; CHECK-NEXT: retq %t0 = load i32, i32* @c, align 4 Index: test/CodeGen/X86/pull-binop-through-shift.ll =================================================================== --- test/CodeGen/X86/pull-binop-through-shift.ll +++ test/CodeGen/X86/pull-binop-through-shift.ll @@ -195,10 +195,9 @@ ; ; X32-LABEL: and_signbit_lshr: ; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $16, %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: shrl $8, %eax +; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-NEXT: shll $8, %eax ; X32-NEXT: movl %eax, (%ecx) ; X32-NEXT: retl %t0 = and i32 %x, 4294901760 ; 0xFFFF0000 Index: test/CodeGen/X86/rotate-extract-vector.ll =================================================================== --- test/CodeGen/X86/rotate-extract-vector.ll +++ test/CodeGen/X86/rotate-extract-vector.ll @@ -155,13 +155,22 @@ ; Result would undershift define <4 x i64> @no_extract_shl(<4 x i64> %i) nounwind { -; CHECK-LABEL: no_extract_shl: -; CHECK: # %bb.0: -; CHECK-NEXT: vpsllq $11, %ymm0, %ymm1 -; CHECK-NEXT: vpsllq $24, %ymm0, %ymm0 -; CHECK-NEXT: vpsrlq $50, %ymm1, %ymm1 -; CHECK-NEXT: vpor %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: ret{{[l|q]}} +; X86-LABEL: no_extract_shl: +; X86: # %bb.0: +; X86-NEXT: vpsrlq $39, %ymm0, %ymm1 +; X86-NEXT: vpand {{\.LCPI.*}}, %ymm1, %ymm1 +; X86-NEXT: vpsllq $24, %ymm0, %ymm0 +; X86-NEXT: vpor %ymm0, %ymm1, %ymm0 +; X86-NEXT: retl +; +; X64-LABEL: no_extract_shl: +; X64: # %bb.0: +; X64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [16383,16383,16383,16383] +; X64-NEXT: vpsrlq $39, %ymm0, %ymm2 +; X64-NEXT: vpand %ymm1, %ymm2, %ymm1 +; X64-NEXT: vpsllq $24, %ymm0, %ymm0 +; X64-NEXT: vpor %ymm0, %ymm1, %ymm0 +; X64-NEXT: retq %lhs_mul = shl <4 x i64> %i, %rhs_mul = shl <4 x i64> %i, %lhs_shift = lshr <4 x i64> %lhs_mul, Index: test/CodeGen/X86/rotate-extract.ll =================================================================== --- test/CodeGen/X86/rotate-extract.ll +++ test/CodeGen/X86/rotate-extract.ll @@ -12,12 +12,13 @@ define i64 @rolq_extract_shl(i64 %i) nounwind { ; X86-LABEL: rolq_extract_shl: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: leal (,%edx,8), %eax -; X86-NEXT: shldl $10, %ecx, %edx -; X86-NEXT: shll $10, %ecx -; X86-NEXT: shrl $25, %eax +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: shrl $22, %ecx +; X86-NEXT: andl $127, %ecx +; X86-NEXT: shldl $10, %eax, %edx +; X86-NEXT: shll $10, %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: retl ; @@ -140,23 +141,23 @@ define i64 @no_extract_shl(i64 %i) nounwind { ; X86-LABEL: no_extract_shl: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %eax -; X86-NEXT: shll $5, %eax -; X86-NEXT: shldl $10, %ecx, %edx -; X86-NEXT: shll $10, %ecx -; X86-NEXT: shrl $25, %eax +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: shrl $20, %ecx +; X86-NEXT: andl $127, %ecx +; X86-NEXT: shldl $10, %eax, %edx +; X86-NEXT: shll $10, %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: no_extract_shl: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shlq $5, %rax +; X64-NEXT: shrq $52, %rax +; X64-NEXT: andl $127, %eax ; X64-NEXT: shlq $10, %rdi -; X64-NEXT: shrq $57, %rax -; X64-NEXT: leaq (%rax,%rdi), %rax +; X64-NEXT: leaq (%rdi,%rax), %rax ; X64-NEXT: retq %lhs_mul = shl i64 %i, 5 %rhs_mul = shl i64 %i, 10 Index: test/CodeGen/X86/shift-mask.ll =================================================================== --- test/CodeGen/X86/shift-mask.ll +++ test/CodeGen/X86/shift-mask.ll @@ -337,17 +337,25 @@ ; X86-LABEL: test_i8_lshr_lshr_1: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: shlb $3, %al -; X86-NEXT: shrb $5, %al +; X86-NEXT: shrb $2, %al +; X86-NEXT: andb $7, %al ; X86-NEXT: retl ; -; X64-LABEL: test_i8_lshr_lshr_1: -; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: leal (,%rdi,8), %eax -; X64-NEXT: shrb $5, %al -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq +; X64-MASK-LABEL: test_i8_lshr_lshr_1: +; X64-MASK: # %bb.0: +; X64-MASK-NEXT: movl %edi, %eax +; X64-MASK-NEXT: shrb $2, %al +; X64-MASK-NEXT: andb $7, %al +; X64-MASK-NEXT: # kill: def $al killed $al killed $eax +; X64-MASK-NEXT: retq +; +; X64-SHIFT-LABEL: test_i8_lshr_lshr_1: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: # kill: def $edi killed $edi def $rdi +; X64-SHIFT-NEXT: leal (,%rdi,8), %eax +; X64-SHIFT-NEXT: shrb $5, %al +; X64-SHIFT-NEXT: # kill: def $al killed $al killed $eax +; X64-SHIFT-NEXT: retq %1 = shl i8 %a0, 3 %2 = lshr i8 %1, 5 ret i8 %2 @@ -357,17 +365,25 @@ ; X86-LABEL: test_i8_lshr_lshr_2: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: shlb $5, %al -; X86-NEXT: shrb $3, %al +; X86-NEXT: shlb $2, %al +; X86-NEXT: andb $28, %al ; X86-NEXT: retl ; -; X64-LABEL: test_i8_lshr_lshr_2: -; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shlb $5, %al -; X64-NEXT: shrb $3, %al -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq +; X64-MASK-LABEL: test_i8_lshr_lshr_2: +; X64-MASK: # %bb.0: +; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi +; X64-MASK-NEXT: leal (,%rdi,4), %eax +; X64-MASK-NEXT: andb $28, %al +; X64-MASK-NEXT: # kill: def $al killed $al killed $eax +; X64-MASK-NEXT: retq +; +; X64-SHIFT-LABEL: test_i8_lshr_lshr_2: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: movl %edi, %eax +; X64-SHIFT-NEXT: shlb $5, %al +; X64-SHIFT-NEXT: shrb $3, %al +; X64-SHIFT-NEXT: # kill: def $al killed $al killed $eax +; X64-SHIFT-NEXT: retq %1 = shl i8 %a0, 5 %2 = lshr i8 %1, 3 ret i8 %2 @@ -476,16 +492,23 @@ ; X86-LABEL: test_i32_lshr_lshr_1: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shll $3, %eax -; X86-NEXT: shrl $5, %eax +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF ; X86-NEXT: retl ; -; X64-LABEL: test_i32_lshr_lshr_1: -; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: leal (,%rdi,8), %eax -; X64-NEXT: shrl $5, %eax -; X64-NEXT: retq +; X64-MASK-LABEL: test_i32_lshr_lshr_1: +; X64-MASK: # %bb.0: +; X64-MASK-NEXT: movl %edi, %eax +; X64-MASK-NEXT: shrl $2, %eax +; X64-MASK-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF +; X64-MASK-NEXT: retq +; +; X64-SHIFT-LABEL: test_i32_lshr_lshr_1: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: # kill: def $edi killed $edi def $rdi +; X64-SHIFT-NEXT: leal (,%rdi,8), %eax +; X64-SHIFT-NEXT: shrl $5, %eax +; X64-SHIFT-NEXT: retq %1 = shl i32 %a0, 3 %2 = lshr i32 %1, 5 ret i32 %2 @@ -495,16 +518,23 @@ ; X86-LABEL: test_i32_lshr_lshr_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shll $5, %eax -; X86-NEXT: shrl $3, %eax +; X86-NEXT: shll $2, %eax +; X86-NEXT: andl $536870908, %eax # imm = 0x1FFFFFFC ; X86-NEXT: retl ; -; X64-LABEL: test_i32_lshr_lshr_2: -; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $5, %eax -; X64-NEXT: shrl $3, %eax -; X64-NEXT: retq +; X64-MASK-LABEL: test_i32_lshr_lshr_2: +; X64-MASK: # %bb.0: +; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi +; X64-MASK-NEXT: leal (,%rdi,4), %eax +; X64-MASK-NEXT: andl $536870908, %eax # imm = 0x1FFFFFFC +; X64-MASK-NEXT: retq +; +; X64-SHIFT-LABEL: test_i32_lshr_lshr_2: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: movl %edi, %eax +; X64-SHIFT-NEXT: shll $5, %eax +; X64-SHIFT-NEXT: shrl $3, %eax +; X64-SHIFT-NEXT: retq %1 = shl i32 %a0, 5 %2 = lshr i32 %1, 3 ret i32 %2 @@ -556,17 +586,23 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: shldl $3, %eax, %edx -; X86-NEXT: shll $3, %eax -; X86-NEXT: shrdl $5, %edx, %eax -; X86-NEXT: shrl $5, %edx +; X86-NEXT: shrdl $2, %edx, %eax +; X86-NEXT: shrl $2, %edx +; X86-NEXT: andl $134217727, %edx # imm = 0x7FFFFFF ; X86-NEXT: retl ; -; X64-LABEL: test_i64_lshr_lshr_1: -; X64: # %bb.0: -; X64-NEXT: leaq (,%rdi,8), %rax -; X64-NEXT: shrq $5, %rax -; X64-NEXT: retq +; X64-MASK-LABEL: test_i64_lshr_lshr_1: +; X64-MASK: # %bb.0: +; X64-MASK-NEXT: shrq $2, %rdi +; X64-MASK-NEXT: movabsq $576460752303423487, %rax # imm = 0x7FFFFFFFFFFFFFF +; X64-MASK-NEXT: andq %rdi, %rax +; X64-MASK-NEXT: retq +; +; X64-SHIFT-LABEL: test_i64_lshr_lshr_1: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: leaq (,%rdi,8), %rax +; X64-SHIFT-NEXT: shrq $5, %rax +; X64-SHIFT-NEXT: retq %1 = shl i64 %a0, 3 %2 = lshr i64 %1, 5 ret i64 %2 @@ -577,18 +613,24 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: shldl $5, %eax, %edx -; X86-NEXT: shll $5, %eax -; X86-NEXT: shrdl $3, %edx, %eax -; X86-NEXT: shrl $3, %edx +; X86-NEXT: shldl $2, %eax, %edx +; X86-NEXT: andl $536870911, %edx # imm = 0x1FFFFFFF +; X86-NEXT: shll $2, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_i64_lshr_lshr_2: -; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shlq $5, %rax -; X64-NEXT: shrq $3, %rax -; X64-NEXT: retq +; X64-MASK-LABEL: test_i64_lshr_lshr_2: +; X64-MASK: # %bb.0: +; X64-MASK-NEXT: leaq (,%rdi,4), %rcx +; X64-MASK-NEXT: movabsq $2305843009213693948, %rax # imm = 0x1FFFFFFFFFFFFFFC +; X64-MASK-NEXT: andq %rcx, %rax +; X64-MASK-NEXT: retq +; +; X64-SHIFT-LABEL: test_i64_lshr_lshr_2: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: movq %rdi, %rax +; X64-SHIFT-NEXT: shlq $5, %rax +; X64-SHIFT-NEXT: shrq $3, %rax +; X64-SHIFT-NEXT: retq %1 = shl i64 %a0, 5 %2 = lshr i64 %1, 3 ret i64 %2