Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -35290,9 +35290,16 @@ return (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())); } -// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI. -static SDValue combineAndIntoBEXTR(SDNode *Node, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +// Look for the pattern: +// (X l>> C1) & C2 or (X l>> C1) & (C2 << C3) +// Where C2 is all-ones in lowbits, and C3 might be zero, +// and transform it into: +// ((X l>> (C1 + C3)) & C2) << C3 +// Which is combined into: +// (bextr X, (((C1 + C3) << 8) | popcnt(C2))) << C3 +// The last `<< C3` shift only exists if C3 is not zero. +static SDValue combineShiftAndIntoBEXTR(SDNode *Node, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { EVT NVT = Node->getValueType(0); SDLoc dl(Node); @@ -35317,17 +35324,19 @@ if (!MaskCst || !ShiftCst) return SDValue(); - // And RHS must be a mask. + // And RHS must be a (potentially shifted) mask. uint64_t Mask = MaskCst->getZExtValue(); - if (!isMask_64(Mask)) + if (!isShiftedMask_64(Mask)) return SDValue(); - uint64_t Shift = ShiftCst->getZExtValue(); uint64_t MaskSize = countPopulation(Mask); + uint64_t ZeroPaddingSize = countTrailingZeros(Mask); + + uint64_t Shift = ZeroPaddingSize + ShiftCst->getZExtValue(); // Don't interfere with something that can be handled by extracting AH. // TODO: If we are able to fold a load, BEXTR might still be better than AH. - if (Shift == 8 && MaskSize == 8) + if (Shift == 8 && MaskSize == 8 && ZeroPaddingSize == 0) return SDValue(); // Make sure we are only using bits that were in the original value, not @@ -35338,6 +35347,13 @@ // Create a BEXTR node. SDValue C = DAG.getConstant(Shift | (MaskSize << 8), dl, NVT); SDValue New = DAG.getNode(X86ISD::BEXTR, dl, NVT, N0->getOperand(0), C); + + // If the mask had some zero low bits, we need to re-introduce them. + if (ZeroPaddingSize > 0) { + SDValue Cp = DAG.getConstant(ZeroPaddingSize, dl, MVT::i8); + New = DAG.getNode(ISD::SHL, dl, NVT, New, Cp); + } + return New; } @@ -35442,7 +35458,7 @@ if (DCI.isBeforeLegalizeOps()) return SDValue(); - if (SDValue R = combineAndIntoBEXTR(N, DAG, Subtarget)) + if (SDValue R = combineShiftAndIntoBEXTR(N, DAG, Subtarget)) return R; if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget)) Index: test/CodeGen/X86/bmi-x86_64.ll =================================================================== --- test/CodeGen/X86/bmi-x86_64.ll +++ test/CodeGen/X86/bmi-x86_64.ll @@ -103,12 +103,19 @@ } define i64 @non_bextr64(i64 %x) { -; CHECK-LABEL: non_bextr64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: shrq $2, %rdi -; CHECK-NEXT: movabsq $8589934590, %rax # imm = 0x1FFFFFFFE -; CHECK-NEXT: andq %rdi, %rax -; CHECK-NEXT: retq +; BEXTR-SLOW-LABEL: non_bextr64: +; BEXTR-SLOW: # %bb.0: # %entry +; BEXTR-SLOW-NEXT: shrq $2, %rdi +; BEXTR-SLOW-NEXT: movabsq $8589934590, %rax # imm = 0x1FFFFFFFE +; BEXTR-SLOW-NEXT: andq %rdi, %rax +; BEXTR-SLOW-NEXT: retq +; +; BEXTR-FAST-LABEL: non_bextr64: +; BEXTR-FAST: # %bb.0: # %entry +; BEXTR-FAST-NEXT: movl $8195, %eax # imm = 0x2003 +; BEXTR-FAST-NEXT: bextrq %rax, %rdi, %rax +; BEXTR-FAST-NEXT: addq %rax, %rax +; BEXTR-FAST-NEXT: retq entry: %shr = lshr i64 %x, 2 %and = and i64 %shr, 8589934590 Index: test/CodeGen/X86/extract-bits.ll =================================================================== --- test/CodeGen/X86/extract-bits.ll +++ test/CodeGen/X86/extract-bits.ll @@ -5615,23 +5615,69 @@ ; https://bugs.llvm.org/show_bug.cgi?id=38938 define void @pr38938(i32* %a0, i64* %a1) { -; X86-LABEL: pr38938: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: shrl $19, %ecx -; X86-NEXT: andl $4092, %ecx # imm = 0xFFC -; X86-NEXT: incl (%eax,%ecx) -; X86-NEXT: retl +; X86-NOBMI-LABEL: pr38938: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl (%ecx), %ecx +; X86-NOBMI-NEXT: shrl $19, %ecx +; X86-NOBMI-NEXT: andl $4092, %ecx # imm = 0xFFC +; X86-NOBMI-NEXT: incl (%eax,%ecx) +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: pr38938: -; X64: # %bb.0: -; X64-NEXT: movq (%rsi), %rax -; X64-NEXT: shrq $19, %rax -; X64-NEXT: andl $4092, %eax # imm = 0xFFC -; X64-NEXT: incl (%rdi,%rax) -; X64-NEXT: retq +; X86-BMI1NOTBM-LABEL: pr38938: +; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1NOTBM-NEXT: movl $2581, %edx # imm = 0xA15 +; X86-BMI1NOTBM-NEXT: bextrl %edx, (%ecx), %ecx +; X86-BMI1NOTBM-NEXT: incl (%eax,%ecx,4) +; X86-BMI1NOTBM-NEXT: retl +; +; X86-BMI1TBM-LABEL: pr38938: +; X86-BMI1TBM: # %bb.0: +; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1TBM-NEXT: bextrl $2581, (%ecx), %ecx # imm = 0xA15 +; X86-BMI1TBM-NEXT: incl (%eax,%ecx,4) +; X86-BMI1TBM-NEXT: retl +; +; X86-BMI1NOTBMBMI2-LABEL: pr38938: +; X86-BMI1NOTBMBMI2: # %bb.0: +; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1NOTBMBMI2-NEXT: movl $2581, %edx # imm = 0xA15 +; X86-BMI1NOTBMBMI2-NEXT: bextrl %edx, (%ecx), %ecx +; X86-BMI1NOTBMBMI2-NEXT: incl (%eax,%ecx,4) +; X86-BMI1NOTBMBMI2-NEXT: retl +; +; X64-NOBMI-LABEL: pr38938: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq (%rsi), %rax +; X64-NOBMI-NEXT: shrq $19, %rax +; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X64-NOBMI-NEXT: incl (%rdi,%rax) +; X64-NOBMI-NEXT: retq +; +; X64-BMI1NOTBM-LABEL: pr38938: +; X64-BMI1NOTBM: # %bb.0: +; X64-BMI1NOTBM-NEXT: movl $2581, %eax # imm = 0xA15 +; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rsi), %rax +; X64-BMI1NOTBM-NEXT: incl (%rdi,%rax,4) +; X64-BMI1NOTBM-NEXT: retq +; +; X64-BMI1TBM-LABEL: pr38938: +; X64-BMI1TBM: # %bb.0: +; X64-BMI1TBM-NEXT: bextrq $2581, (%rsi), %rax # imm = 0xA15 +; X64-BMI1TBM-NEXT: incl (%rdi,%rax,4) +; X64-BMI1TBM-NEXT: retq +; +; X64-BMI1NOTBMBMI2-LABEL: pr38938: +; X64-BMI1NOTBMBMI2: # %bb.0: +; X64-BMI1NOTBMBMI2-NEXT: movl $2581, %eax # imm = 0xA15 +; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, (%rsi), %rax +; X64-BMI1NOTBMBMI2-NEXT: incl (%rdi,%rax,4) +; X64-BMI1NOTBMBMI2-NEXT: retq %tmp = load i64, i64* %a1, align 8 %tmp1 = lshr i64 %tmp, 21 %tmp2 = and i64 %tmp1, 1023 @@ -5698,19 +5744,59 @@ ; Should be still fine, but the mask is shifted define i32 @c1_i32(i32 %arg) { -; X86-LABEL: c1_i32: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl $19, %eax -; X86-NEXT: andl $4092, %eax # imm = 0xFFC -; X86-NEXT: retl +; X86-NOBMI-LABEL: c1_i32: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: shrl $19, %eax +; X86-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: c1_i32: -; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shrl $19, %eax -; X64-NEXT: andl $4092, %eax # imm = 0xFFC -; X64-NEXT: retq +; X86-BMI1NOTBM-LABEL: c1_i32: +; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: movl $2581, %eax # imm = 0xA15 +; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: shll $2, %eax +; X86-BMI1NOTBM-NEXT: retl +; +; X86-BMI1TBM-LABEL: c1_i32: +; X86-BMI1TBM: # %bb.0: +; X86-BMI1TBM-NEXT: bextrl $2581, {{[0-9]+}}(%esp), %eax # imm = 0xA15 +; X86-BMI1TBM-NEXT: shll $2, %eax +; X86-BMI1TBM-NEXT: retl +; +; X86-BMI1NOTBMBMI2-LABEL: c1_i32: +; X86-BMI1NOTBMBMI2: # %bb.0: +; X86-BMI1NOTBMBMI2-NEXT: movl $2581, %eax # imm = 0xA15 +; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: shll $2, %eax +; X86-BMI1NOTBMBMI2-NEXT: retl +; +; X64-NOBMI-LABEL: c1_i32: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: shrl $19, %eax +; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X64-NOBMI-NEXT: retq +; +; X64-BMI1NOTBM-LABEL: c1_i32: +; X64-BMI1NOTBM: # %bb.0: +; X64-BMI1NOTBM-NEXT: movl $2581, %eax # imm = 0xA15 +; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1NOTBM-NEXT: shll $2, %eax +; X64-BMI1NOTBM-NEXT: retq +; +; X64-BMI1TBM-LABEL: c1_i32: +; X64-BMI1TBM: # %bb.0: +; X64-BMI1TBM-NEXT: bextrl $2581, %edi, %eax # imm = 0xA15 +; X64-BMI1TBM-NEXT: shll $2, %eax +; X64-BMI1TBM-NEXT: retq +; +; X64-BMI1NOTBMBMI2-LABEL: c1_i32: +; X64-BMI1NOTBMBMI2: # %bb.0: +; X64-BMI1NOTBMBMI2-NEXT: movl $2581, %eax # imm = 0xA15 +; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1NOTBMBMI2-NEXT: shll $2, %eax +; X64-BMI1NOTBMBMI2-NEXT: retq %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 4092 ret i32 %tmp1 @@ -5718,19 +5804,59 @@ ; Should be still fine, but the result is shifted left afterwards define i32 @c2_i32(i32 %arg) { -; X86-LABEL: c2_i32: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl $17, %eax -; X86-NEXT: andl $4092, %eax # imm = 0xFFC -; X86-NEXT: retl +; X86-NOBMI-LABEL: c2_i32: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: shrl $17, %eax +; X86-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: c2_i32: -; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shrl $17, %eax -; X64-NEXT: andl $4092, %eax # imm = 0xFFC -; X64-NEXT: retq +; X86-BMI1NOTBM-LABEL: c2_i32: +; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: shll $2, %eax +; X86-BMI1NOTBM-NEXT: retl +; +; X86-BMI1TBM-LABEL: c2_i32: +; X86-BMI1TBM: # %bb.0: +; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13 +; X86-BMI1TBM-NEXT: shll $2, %eax +; X86-BMI1TBM-NEXT: retl +; +; X86-BMI1NOTBMBMI2-LABEL: c2_i32: +; X86-BMI1NOTBMBMI2: # %bb.0: +; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 +; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: shll $2, %eax +; X86-BMI1NOTBMBMI2-NEXT: retl +; +; X64-NOBMI-LABEL: c2_i32: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: shrl $17, %eax +; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X64-NOBMI-NEXT: retq +; +; X64-BMI1NOTBM-LABEL: c2_i32: +; X64-BMI1NOTBM: # %bb.0: +; X64-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1NOTBM-NEXT: shll $2, %eax +; X64-BMI1NOTBM-NEXT: retq +; +; X64-BMI1TBM-LABEL: c2_i32: +; X64-BMI1TBM: # %bb.0: +; X64-BMI1TBM-NEXT: bextrl $2579, %edi, %eax # imm = 0xA13 +; X64-BMI1TBM-NEXT: shll $2, %eax +; X64-BMI1TBM-NEXT: retq +; +; X64-BMI1NOTBMBMI2-LABEL: c2_i32: +; X64-BMI1NOTBMBMI2: # %bb.0: +; X64-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 +; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1NOTBMBMI2-NEXT: shll $2, %eax +; X64-BMI1NOTBMBMI2-NEXT: retq %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 1023 %tmp2 = shl i32 %tmp1, 2 @@ -5739,19 +5865,59 @@ ; The mask covers newly shifted-in bit define i32 @c4_i32_bad(i32 %arg) { -; X86-LABEL: c4_i32_bad: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl $19, %eax -; X86-NEXT: andl $-2, %eax -; X86-NEXT: retl +; X86-NOBMI-LABEL: c4_i32_bad: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: shrl $19, %eax +; X86-NOBMI-NEXT: andl $-2, %eax +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: c4_i32_bad: -; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shrl $19, %eax -; X64-NEXT: andl $-2, %eax -; X64-NEXT: retq +; X86-BMI1NOTBM-LABEL: c4_i32_bad: +; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: movl $3092, %eax # imm = 0xC14 +; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: addl %eax, %eax +; X86-BMI1NOTBM-NEXT: retl +; +; X86-BMI1TBM-LABEL: c4_i32_bad: +; X86-BMI1TBM: # %bb.0: +; X86-BMI1TBM-NEXT: bextrl $3092, {{[0-9]+}}(%esp), %eax # imm = 0xC14 +; X86-BMI1TBM-NEXT: addl %eax, %eax +; X86-BMI1TBM-NEXT: retl +; +; X86-BMI1NOTBMBMI2-LABEL: c4_i32_bad: +; X86-BMI1NOTBMBMI2: # %bb.0: +; X86-BMI1NOTBMBMI2-NEXT: movl $3092, %eax # imm = 0xC14 +; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: addl %eax, %eax +; X86-BMI1NOTBMBMI2-NEXT: retl +; +; X64-NOBMI-LABEL: c4_i32_bad: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: shrl $19, %eax +; X64-NOBMI-NEXT: andl $-2, %eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI1NOTBM-LABEL: c4_i32_bad: +; X64-BMI1NOTBM: # %bb.0: +; X64-BMI1NOTBM-NEXT: movl $3092, %eax # imm = 0xC14 +; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1NOTBM-NEXT: addl %eax, %eax +; X64-BMI1NOTBM-NEXT: retq +; +; X64-BMI1TBM-LABEL: c4_i32_bad: +; X64-BMI1TBM: # %bb.0: +; X64-BMI1TBM-NEXT: bextrl $3092, %edi, %eax # imm = 0xC14 +; X64-BMI1TBM-NEXT: addl %eax, %eax +; X64-BMI1TBM-NEXT: retq +; +; X64-BMI1NOTBMBMI2-LABEL: c4_i32_bad: +; X64-BMI1NOTBMBMI2: # %bb.0: +; X64-BMI1NOTBMBMI2-NEXT: movl $3092, %eax # imm = 0xC14 +; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1NOTBMBMI2-NEXT: addl %eax, %eax +; X64-BMI1NOTBMBMI2-NEXT: retq %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 16382 ret i32 %tmp1 @@ -5819,20 +5985,63 @@ ; Should be still fine, but the mask is shifted define i64 @c1_i64(i64 %arg) { -; X86-LABEL: c1_i64: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl $19, %eax -; X86-NEXT: andl $4092, %eax # imm = 0xFFC -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: retl +; X86-NOBMI-LABEL: c1_i64: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: shrl $19, %eax +; X86-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X86-NOBMI-NEXT: xorl %edx, %edx +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: c1_i64: -; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shrq $51, %rax -; X64-NEXT: andl $4092, %eax # imm = 0xFFC -; X64-NEXT: retq +; X86-BMI1NOTBM-LABEL: c1_i64: +; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: movl $2581, %eax # imm = 0xA15 +; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: shll $2, %eax +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx +; X86-BMI1NOTBM-NEXT: retl +; +; X86-BMI1TBM-LABEL: c1_i64: +; X86-BMI1TBM: # %bb.0: +; X86-BMI1TBM-NEXT: bextrl $2581, {{[0-9]+}}(%esp), %eax # imm = 0xA15 +; X86-BMI1TBM-NEXT: shll $2, %eax +; X86-BMI1TBM-NEXT: xorl %edx, %edx +; X86-BMI1TBM-NEXT: retl +; +; X86-BMI1NOTBMBMI2-LABEL: c1_i64: +; X86-BMI1NOTBMBMI2: # %bb.0: +; X86-BMI1NOTBMBMI2-NEXT: movl $2581, %eax # imm = 0xA15 +; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: shll $2, %eax +; X86-BMI1NOTBMBMI2-NEXT: xorl %edx, %edx +; X86-BMI1NOTBMBMI2-NEXT: retl +; +; X64-NOBMI-LABEL: c1_i64: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdi, %rax +; X64-NOBMI-NEXT: shrq $51, %rax +; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X64-NOBMI-NEXT: retq +; +; X64-BMI1NOTBM-LABEL: c1_i64: +; X64-BMI1NOTBM: # %bb.0: +; X64-BMI1NOTBM-NEXT: movl $2613, %eax # imm = 0xA35 +; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1NOTBM-NEXT: shlq $2, %rax +; X64-BMI1NOTBM-NEXT: retq +; +; X64-BMI1TBM-LABEL: c1_i64: +; X64-BMI1TBM: # %bb.0: +; X64-BMI1TBM-NEXT: bextrq $2613, %rdi, %rax # imm = 0xA35 +; X64-BMI1TBM-NEXT: shlq $2, %rax +; X64-BMI1TBM-NEXT: retq +; +; X64-BMI1NOTBMBMI2-LABEL: c1_i64: +; X64-BMI1NOTBMBMI2: # %bb.0: +; X64-BMI1NOTBMBMI2-NEXT: movl $2613, %eax # imm = 0xA35 +; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1NOTBMBMI2-NEXT: shlq $2, %rax +; X64-BMI1NOTBMBMI2-NEXT: retq %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 4092 ret i64 %tmp1 @@ -5840,20 +6049,63 @@ ; Should be still fine, but the result is shifted left afterwards define i64 @c2_i64(i64 %arg) { -; X86-LABEL: c2_i64: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl $17, %eax -; X86-NEXT: andl $4092, %eax # imm = 0xFFC -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: retl +; X86-NOBMI-LABEL: c2_i64: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: shrl $17, %eax +; X86-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X86-NOBMI-NEXT: xorl %edx, %edx +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: c2_i64: -; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shrq $49, %rax -; X64-NEXT: andl $4092, %eax # imm = 0xFFC -; X64-NEXT: retq +; X86-BMI1NOTBM-LABEL: c2_i64: +; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: shll $2, %eax +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx +; X86-BMI1NOTBM-NEXT: retl +; +; X86-BMI1TBM-LABEL: c2_i64: +; X86-BMI1TBM: # %bb.0: +; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13 +; X86-BMI1TBM-NEXT: shll $2, %eax +; X86-BMI1TBM-NEXT: xorl %edx, %edx +; X86-BMI1TBM-NEXT: retl +; +; X86-BMI1NOTBMBMI2-LABEL: c2_i64: +; X86-BMI1NOTBMBMI2: # %bb.0: +; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 +; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: shll $2, %eax +; X86-BMI1NOTBMBMI2-NEXT: xorl %edx, %edx +; X86-BMI1NOTBMBMI2-NEXT: retl +; +; X64-NOBMI-LABEL: c2_i64: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdi, %rax +; X64-NOBMI-NEXT: shrq $49, %rax +; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X64-NOBMI-NEXT: retq +; +; X64-BMI1NOTBM-LABEL: c2_i64: +; X64-BMI1NOTBM: # %bb.0: +; X64-BMI1NOTBM-NEXT: movl $2611, %eax # imm = 0xA33 +; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1NOTBM-NEXT: shlq $2, %rax +; X64-BMI1NOTBM-NEXT: retq +; +; X64-BMI1TBM-LABEL: c2_i64: +; X64-BMI1TBM: # %bb.0: +; X64-BMI1TBM-NEXT: bextrq $2611, %rdi, %rax # imm = 0xA33 +; X64-BMI1TBM-NEXT: shlq $2, %rax +; X64-BMI1TBM-NEXT: retq +; +; X64-BMI1NOTBMBMI2-LABEL: c2_i64: +; X64-BMI1NOTBMBMI2: # %bb.0: +; X64-BMI1NOTBMBMI2-NEXT: movl $2611, %eax # imm = 0xA33 +; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1NOTBMBMI2-NEXT: shlq $2, %rax +; X64-BMI1NOTBMBMI2-NEXT: retq %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 1023 %tmp2 = shl i64 %tmp1, 2 @@ -5862,20 +6114,63 @@ ; The mask covers newly shifted-in bit define i64 @c4_i64_bad(i64 %arg) { -; X86-LABEL: c4_i64_bad: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl $19, %eax -; X86-NEXT: andl $-2, %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: retl +; X86-NOBMI-LABEL: c4_i64_bad: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: shrl $19, %eax +; X86-NOBMI-NEXT: andl $-2, %eax +; X86-NOBMI-NEXT: xorl %edx, %edx +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: c4_i64_bad: -; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shrq $51, %rax -; X64-NEXT: andl $-2, %eax -; X64-NEXT: retq +; X86-BMI1NOTBM-LABEL: c4_i64_bad: +; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: movl $3092, %eax # imm = 0xC14 +; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: addl %eax, %eax +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx +; X86-BMI1NOTBM-NEXT: retl +; +; X86-BMI1TBM-LABEL: c4_i64_bad: +; X86-BMI1TBM: # %bb.0: +; X86-BMI1TBM-NEXT: bextrl $3092, {{[0-9]+}}(%esp), %eax # imm = 0xC14 +; X86-BMI1TBM-NEXT: addl %eax, %eax +; X86-BMI1TBM-NEXT: xorl %edx, %edx +; X86-BMI1TBM-NEXT: retl +; +; X86-BMI1NOTBMBMI2-LABEL: c4_i64_bad: +; X86-BMI1NOTBMBMI2: # %bb.0: +; X86-BMI1NOTBMBMI2-NEXT: movl $3092, %eax # imm = 0xC14 +; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: addl %eax, %eax +; X86-BMI1NOTBMBMI2-NEXT: xorl %edx, %edx +; X86-BMI1NOTBMBMI2-NEXT: retl +; +; X64-NOBMI-LABEL: c4_i64_bad: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdi, %rax +; X64-NOBMI-NEXT: shrq $51, %rax +; X64-NOBMI-NEXT: andl $-2, %eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI1NOTBM-LABEL: c4_i64_bad: +; X64-BMI1NOTBM: # %bb.0: +; X64-BMI1NOTBM-NEXT: movl $3124, %eax # imm = 0xC34 +; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1NOTBM-NEXT: addq %rax, %rax +; X64-BMI1NOTBM-NEXT: retq +; +; X64-BMI1TBM-LABEL: c4_i64_bad: +; X64-BMI1TBM: # %bb.0: +; X64-BMI1TBM-NEXT: bextrq $3124, %rdi, %rax # imm = 0xC34 +; X64-BMI1TBM-NEXT: addq %rax, %rax +; X64-BMI1TBM-NEXT: retq +; +; X64-BMI1NOTBMBMI2-LABEL: c4_i64_bad: +; X64-BMI1NOTBMBMI2: # %bb.0: +; X64-BMI1NOTBMBMI2-NEXT: movl $3124, %eax # imm = 0xC34 +; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1NOTBMBMI2-NEXT: addq %rax, %rax +; X64-BMI1NOTBMBMI2-NEXT: retq %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 16382 ret i64 %tmp1 @@ -6021,21 +6316,70 @@ ; Should be still fine, but the result is shifted left afterwards define void @c7_i32(i32 %arg, i32* %ptr) { -; X86-LABEL: c7_i32: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shrl $17, %ecx -; X86-NEXT: andl $4092, %ecx # imm = 0xFFC -; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: retl +; X86-NOBMI-LABEL: c7_i32: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: shrl $17, %ecx +; X86-NOBMI-NEXT: andl $4092, %ecx # imm = 0xFFC +; X86-NOBMI-NEXT: movl %ecx, (%eax) +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: c7_i32: -; X64: # %bb.0: -; X64-NEXT: shrl $17, %edi -; X64-NEXT: andl $4092, %edi # imm = 0xFFC -; X64-NEXT: movl %edi, (%rsi) -; X64-NEXT: retq +; X86-BMI1NOTBM-LABEL: c7_i32: +; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: movl $2579, %ecx # imm = 0xA13 +; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI1NOTBM-NEXT: shll $2, %ecx +; X86-BMI1NOTBM-NEXT: movl %ecx, (%eax) +; X86-BMI1NOTBM-NEXT: retl +; +; X86-BMI1TBM-LABEL: c7_i32: +; X86-BMI1TBM: # %bb.0: +; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %ecx # imm = 0xA13 +; X86-BMI1TBM-NEXT: shll $2, %ecx +; X86-BMI1TBM-NEXT: movl %ecx, (%eax) +; X86-BMI1TBM-NEXT: retl +; +; X86-BMI1NOTBMBMI2-LABEL: c7_i32: +; X86-BMI1NOTBMBMI2: # %bb.0: +; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %ecx # imm = 0xA13 +; X86-BMI1NOTBMBMI2-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI1NOTBMBMI2-NEXT: shll $2, %ecx +; X86-BMI1NOTBMBMI2-NEXT: movl %ecx, (%eax) +; X86-BMI1NOTBMBMI2-NEXT: retl +; +; X64-NOBMI-LABEL: c7_i32: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: shrl $17, %edi +; X64-NOBMI-NEXT: andl $4092, %edi # imm = 0xFFC +; X64-NOBMI-NEXT: movl %edi, (%rsi) +; X64-NOBMI-NEXT: retq +; +; X64-BMI1NOTBM-LABEL: c7_i32: +; X64-BMI1NOTBM: # %bb.0: +; X64-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1NOTBM-NEXT: shll $2, %eax +; X64-BMI1NOTBM-NEXT: movl %eax, (%rsi) +; X64-BMI1NOTBM-NEXT: retq +; +; X64-BMI1TBM-LABEL: c7_i32: +; X64-BMI1TBM: # %bb.0: +; X64-BMI1TBM-NEXT: bextrl $2579, %edi, %eax # imm = 0xA13 +; X64-BMI1TBM-NEXT: shll $2, %eax +; X64-BMI1TBM-NEXT: movl %eax, (%rsi) +; X64-BMI1TBM-NEXT: retq +; +; X64-BMI1NOTBMBMI2-LABEL: c7_i32: +; X64-BMI1NOTBMBMI2: # %bb.0: +; X64-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 +; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1NOTBMBMI2-NEXT: shll $2, %eax +; X64-BMI1NOTBMBMI2-NEXT: movl %eax, (%rsi) +; X64-BMI1NOTBMBMI2-NEXT: retq %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 1023 %tmp2 = shl i32 %tmp1, 2 @@ -6187,22 +6531,74 @@ ; Should be still fine, but the result is shifted left afterwards define void @c7_i64(i64 %arg, i64* %ptr) { -; X86-LABEL: c7_i64: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shrl $17, %ecx -; X86-NEXT: andl $4092, %ecx # imm = 0xFFC -; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: movl $0, 4(%eax) -; X86-NEXT: retl +; X86-NOBMI-LABEL: c7_i64: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: shrl $17, %ecx +; X86-NOBMI-NEXT: andl $4092, %ecx # imm = 0xFFC +; X86-NOBMI-NEXT: movl %ecx, (%eax) +; X86-NOBMI-NEXT: movl $0, 4(%eax) +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: c7_i64: -; X64: # %bb.0: -; X64-NEXT: shrq $49, %rdi -; X64-NEXT: andl $4092, %edi # imm = 0xFFC -; X64-NEXT: movq %rdi, (%rsi) -; X64-NEXT: retq +; X86-BMI1NOTBM-LABEL: c7_i64: +; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: movl $2579, %ecx # imm = 0xA13 +; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI1NOTBM-NEXT: shll $2, %ecx +; X86-BMI1NOTBM-NEXT: movl %ecx, (%eax) +; X86-BMI1NOTBM-NEXT: movl $0, 4(%eax) +; X86-BMI1NOTBM-NEXT: retl +; +; X86-BMI1TBM-LABEL: c7_i64: +; X86-BMI1TBM: # %bb.0: +; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %ecx # imm = 0xA13 +; X86-BMI1TBM-NEXT: shll $2, %ecx +; X86-BMI1TBM-NEXT: movl %ecx, (%eax) +; X86-BMI1TBM-NEXT: movl $0, 4(%eax) +; X86-BMI1TBM-NEXT: retl +; +; X86-BMI1NOTBMBMI2-LABEL: c7_i64: +; X86-BMI1NOTBMBMI2: # %bb.0: +; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %ecx # imm = 0xA13 +; X86-BMI1NOTBMBMI2-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI1NOTBMBMI2-NEXT: shll $2, %ecx +; X86-BMI1NOTBMBMI2-NEXT: movl %ecx, (%eax) +; X86-BMI1NOTBMBMI2-NEXT: movl $0, 4(%eax) +; X86-BMI1NOTBMBMI2-NEXT: retl +; +; X64-NOBMI-LABEL: c7_i64: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: shrq $49, %rdi +; X64-NOBMI-NEXT: andl $4092, %edi # imm = 0xFFC +; X64-NOBMI-NEXT: movq %rdi, (%rsi) +; X64-NOBMI-NEXT: retq +; +; X64-BMI1NOTBM-LABEL: c7_i64: +; X64-BMI1NOTBM: # %bb.0: +; X64-BMI1NOTBM-NEXT: movl $2611, %eax # imm = 0xA33 +; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1NOTBM-NEXT: shlq $2, %rax +; X64-BMI1NOTBM-NEXT: movq %rax, (%rsi) +; X64-BMI1NOTBM-NEXT: retq +; +; X64-BMI1TBM-LABEL: c7_i64: +; X64-BMI1TBM: # %bb.0: +; X64-BMI1TBM-NEXT: bextrq $2611, %rdi, %rax # imm = 0xA33 +; X64-BMI1TBM-NEXT: shlq $2, %rax +; X64-BMI1TBM-NEXT: movq %rax, (%rsi) +; X64-BMI1TBM-NEXT: retq +; +; X64-BMI1NOTBMBMI2-LABEL: c7_i64: +; X64-BMI1NOTBMBMI2: # %bb.0: +; X64-BMI1NOTBMBMI2-NEXT: movl $2611, %eax # imm = 0xA33 +; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1NOTBMBMI2-NEXT: shlq $2, %rax +; X64-BMI1NOTBMBMI2-NEXT: movq %rax, (%rsi) +; X64-BMI1NOTBMBMI2-NEXT: retq %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 1023 %tmp2 = shl i64 %tmp1, 2