Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -2985,17 +2985,39 @@ /// virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + // Returns true if the target has a pattern + // to extract N bits starting with M'th bit. + // On x86 that could be BMI1's BEXTR, on AArch64 it is UBFX. + virtual bool haveBitFieldExtractPattern(EVT VT) const { return false; } + + // True if N is (X >> C1) & C2 where C2 is a mask with all-ones in low bits. + virtual bool isBitFieldExtractPattern(const SDNode *N) const { + if (!haveBitFieldExtractPattern(N->getValueType(0))) + return false; + if (N->getOpcode() != ISD::AND) + return false; + const auto *AndCstMask = dyn_cast(N->getOperand(1)); + if (!AndCstMask) + return false; + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() != ISD::SRL || !isa(N0.getOperand(1))) + return false; + // Ok, if the mask is all-ones in low bits, then it is 'bit field extract'. + return AndCstMask->getAPIntValue().isMask(); + } + /// Return true if it is profitable to move this shift by a constant amount /// though its operand, adjusting any immediate operands as necessary to /// preserve semantics. This transformation may not be desirable if it - /// disrupts a particularly auspicious target-specific tree (e.g. bitfield - /// extraction in AArch64). By default, it returns true. + /// disrupts a particularly suspicious target-specific tree (e.g. bitfield + /// extraction in x86/AArch64). /// /// @param N the shift node /// @param Level the current DAGCombine legalization level. virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const { - return true; + // Desirable if it will not disturb the 'bit field extract' pattern. + return !isBitFieldExtractPattern(N->getOperand(0).getNode()); } // Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -363,9 +363,10 @@ const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; - /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. - bool isDesirableToCommuteWithShift(const SDNode *N, - CombineLevel Level) const override; + bool haveBitFieldExtractPattern(EVT VT) const override { + // UBFX is valid for i32 and i64 types. + return VT == MVT::i32 || VT == MVT::i64; + } /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8539,24 +8539,6 @@ return ScratchRegs; } -bool -AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N, - CombineLevel Level) const { - N = N->getOperand(0).getNode(); - EVT VT = N->getValueType(0); - // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine - // it with shift to let it be lowered to UBFX. - if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && - isa(N->getOperand(1))) { - uint64_t TruncMask = N->getConstantOperandVal(1); - if (isMask_64(TruncMask) && - N->getOperand(0).getOpcode() == ISD::SRL && - isa(N->getOperand(0)->getOperand(1))) - return false; - } - return true; -} - bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2590,14 +2590,7 @@ SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); - // If we have TBM we can use an immediate for the control. If we have BMI - // we should only do this if the BEXTR instruction is implemented well. - // Otherwise moving the control into a register makes this more costly. - // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM - // hoisting the move immediate would make it worthwhile with a less optimal - // BEXTR? - if (!Subtarget->hasTBM() && - !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR())) + if (!TLI->haveBitFieldExtractPattern(NVT)) return false; // Must have a shift right. Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1034,6 +1034,8 @@ (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } + bool haveBitFieldExtractPattern(EVT VT) const override; + /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -4701,6 +4701,19 @@ return true; } +bool X86TargetLowering::haveBitFieldExtractPattern(EVT VT) const { + // If we have TBM we can use an immediate for the control. If we have BMI + // we should only do this if the BEXTR instruction is implemented well. + // Otherwise moving the control into a register makes this more costly. + // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM + // hoisting the move immediate would make it worthwhile with a less optimal + // BEXTR? + if (!Subtarget.hasTBM() && !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR())) + return false; + // BEXTR is valid for i32 and i64 types. + return VT == MVT::i32 || VT == MVT::i64; +} + /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Index: test/CodeGen/X86/extract-bits.ll =================================================================== --- test/CodeGen/X86/extract-bits.ll +++ test/CodeGen/X86/extract-bits.ll @@ -5615,23 +5615,69 @@ ; https://bugs.llvm.org/show_bug.cgi?id=38938 define void @pr38938(i32* %a0, i64* %a1) { -; X86-LABEL: pr38938: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: shrl $19, %ecx -; X86-NEXT: andl $4092, %ecx # imm = 0xFFC -; X86-NEXT: incl (%eax,%ecx) -; X86-NEXT: retl +; X86-NOBMI-LABEL: pr38938: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl (%ecx), %ecx +; X86-NOBMI-NEXT: shrl $19, %ecx +; X86-NOBMI-NEXT: andl $4092, %ecx # imm = 0xFFC +; X86-NOBMI-NEXT: incl (%eax,%ecx) +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: pr38938: -; X64: # %bb.0: -; X64-NEXT: movq (%rsi), %rax -; X64-NEXT: shrq $19, %rax -; X64-NEXT: andl $4092, %eax # imm = 0xFFC -; X64-NEXT: incl (%rdi,%rax) -; X64-NEXT: retq +; X86-BMI1NOTBM-LABEL: pr38938: +; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1NOTBM-NEXT: movl $2581, %edx # imm = 0xA15 +; X86-BMI1NOTBM-NEXT: bextrl %edx, (%ecx), %ecx +; X86-BMI1NOTBM-NEXT: incl (%eax,%ecx,4) +; X86-BMI1NOTBM-NEXT: retl +; +; X86-BMI1TBM-LABEL: pr38938: +; X86-BMI1TBM: # %bb.0: +; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1TBM-NEXT: bextrl $2581, (%ecx), %ecx # imm = 0xA15 +; X86-BMI1TBM-NEXT: incl (%eax,%ecx,4) +; X86-BMI1TBM-NEXT: retl +; +; X86-BMI1NOTBMBMI2-LABEL: pr38938: +; X86-BMI1NOTBMBMI2: # %bb.0: +; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1NOTBMBMI2-NEXT: movl $2581, %edx # imm = 0xA15 +; X86-BMI1NOTBMBMI2-NEXT: bextrl %edx, (%ecx), %ecx +; X86-BMI1NOTBMBMI2-NEXT: incl (%eax,%ecx,4) +; X86-BMI1NOTBMBMI2-NEXT: retl +; +; X64-NOBMI-LABEL: pr38938: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq (%rsi), %rax +; X64-NOBMI-NEXT: shrq $19, %rax +; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X64-NOBMI-NEXT: incl (%rdi,%rax) +; X64-NOBMI-NEXT: retq +; +; X64-BMI1NOTBM-LABEL: pr38938: +; X64-BMI1NOTBM: # %bb.0: +; X64-BMI1NOTBM-NEXT: movl $2581, %eax # imm = 0xA15 +; X64-BMI1NOTBM-NEXT: bextrl %eax, (%rsi), %eax +; X64-BMI1NOTBM-NEXT: incl (%rdi,%rax,4) +; X64-BMI1NOTBM-NEXT: retq +; +; X64-BMI1TBM-LABEL: pr38938: +; X64-BMI1TBM: # %bb.0: +; X64-BMI1TBM-NEXT: bextrl $2581, (%rsi), %eax # imm = 0xA15 +; X64-BMI1TBM-NEXT: incl (%rdi,%rax,4) +; X64-BMI1TBM-NEXT: retq +; +; X64-BMI1NOTBMBMI2-LABEL: pr38938: +; X64-BMI1NOTBMBMI2: # %bb.0: +; X64-BMI1NOTBMBMI2-NEXT: movl $2581, %eax # imm = 0xA15 +; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, (%rsi), %eax +; X64-BMI1NOTBMBMI2-NEXT: incl (%rdi,%rax,4) +; X64-BMI1NOTBMBMI2-NEXT: retq %tmp = load i64, i64* %a1, align 8 %tmp1 = lshr i64 %tmp, 21 %tmp2 = and i64 %tmp1, 1023 @@ -5718,19 +5764,59 @@ ; Should be still fine, but the result is shifted left afterwards define i32 @c2_i32(i32 %arg) { -; X86-LABEL: c2_i32: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl $17, %eax -; X86-NEXT: andl $4092, %eax # imm = 0xFFC -; X86-NEXT: retl +; X86-NOBMI-LABEL: c2_i32: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: shrl $17, %eax +; X86-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: c2_i32: -; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shrl $17, %eax -; X64-NEXT: andl $4092, %eax # imm = 0xFFC -; X64-NEXT: retq +; X86-BMI1NOTBM-LABEL: c2_i32: +; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: shll $2, %eax +; X86-BMI1NOTBM-NEXT: retl +; +; X86-BMI1TBM-LABEL: c2_i32: +; X86-BMI1TBM: # %bb.0: +; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13 +; X86-BMI1TBM-NEXT: shll $2, %eax +; X86-BMI1TBM-NEXT: retl +; +; X86-BMI1NOTBMBMI2-LABEL: c2_i32: +; X86-BMI1NOTBMBMI2: # %bb.0: +; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 +; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: shll $2, %eax +; X86-BMI1NOTBMBMI2-NEXT: retl +; +; X64-NOBMI-LABEL: c2_i32: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: shrl $17, %eax +; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X64-NOBMI-NEXT: retq +; +; X64-BMI1NOTBM-LABEL: c2_i32: +; X64-BMI1NOTBM: # %bb.0: +; X64-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1NOTBM-NEXT: shll $2, %eax +; X64-BMI1NOTBM-NEXT: retq +; +; X64-BMI1TBM-LABEL: c2_i32: +; X64-BMI1TBM: # %bb.0: +; X64-BMI1TBM-NEXT: bextrl $2579, %edi, %eax # imm = 0xA13 +; X64-BMI1TBM-NEXT: shll $2, %eax +; X64-BMI1TBM-NEXT: retq +; +; X64-BMI1NOTBMBMI2-LABEL: c2_i32: +; X64-BMI1NOTBMBMI2: # %bb.0: +; X64-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 +; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1NOTBMBMI2-NEXT: shll $2, %eax +; X64-BMI1NOTBMBMI2-NEXT: retq %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 1023 %tmp2 = shl i32 %tmp1, 2 @@ -5840,20 +5926,63 @@ ; Should be still fine, but the result is shifted left afterwards define i64 @c2_i64(i64 %arg) { -; X86-LABEL: c2_i64: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl $17, %eax -; X86-NEXT: andl $4092, %eax # imm = 0xFFC -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: retl +; X86-NOBMI-LABEL: c2_i64: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: shrl $17, %eax +; X86-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X86-NOBMI-NEXT: xorl %edx, %edx +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: c2_i64: -; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shrq $49, %rax -; X64-NEXT: andl $4092, %eax # imm = 0xFFC -; X64-NEXT: retq +; X86-BMI1NOTBM-LABEL: c2_i64: +; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: shll $2, %eax +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx +; X86-BMI1NOTBM-NEXT: retl +; +; X86-BMI1TBM-LABEL: c2_i64: +; X86-BMI1TBM: # %bb.0: +; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13 +; X86-BMI1TBM-NEXT: shll $2, %eax +; X86-BMI1TBM-NEXT: xorl %edx, %edx +; X86-BMI1TBM-NEXT: retl +; +; X86-BMI1NOTBMBMI2-LABEL: c2_i64: +; X86-BMI1NOTBMBMI2: # %bb.0: +; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 +; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: shll $2, %eax +; X86-BMI1NOTBMBMI2-NEXT: xorl %edx, %edx +; X86-BMI1NOTBMBMI2-NEXT: retl +; +; X64-NOBMI-LABEL: c2_i64: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdi, %rax +; X64-NOBMI-NEXT: shrq $49, %rax +; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X64-NOBMI-NEXT: retq +; +; X64-BMI1NOTBM-LABEL: c2_i64: +; X64-BMI1NOTBM: # %bb.0: +; X64-BMI1NOTBM-NEXT: movl $2611, %eax # imm = 0xA33 +; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1NOTBM-NEXT: shlq $2, %rax +; X64-BMI1NOTBM-NEXT: retq +; +; X64-BMI1TBM-LABEL: c2_i64: +; X64-BMI1TBM: # %bb.0: +; X64-BMI1TBM-NEXT: bextrq $2611, %rdi, %rax # imm = 0xA33 +; X64-BMI1TBM-NEXT: shlq $2, %rax +; X64-BMI1TBM-NEXT: retq +; +; X64-BMI1NOTBMBMI2-LABEL: c2_i64: +; X64-BMI1NOTBMBMI2: # %bb.0: +; X64-BMI1NOTBMBMI2-NEXT: movl $2611, %eax # imm = 0xA33 +; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1NOTBMBMI2-NEXT: shlq $2, %rax +; X64-BMI1NOTBMBMI2-NEXT: retq %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 1023 %tmp2 = shl i64 %tmp1, 2