Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -2985,17 +2985,39 @@ /// virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + // Returns true if the target has a pattern + // to extract N bits starting with M'th bit. + // On x86 that could be BMI1's BEXTR, on AArch64 it is UBFX. + virtual bool haveBitFieldExtractPattern(EVT VT) const { return false; } + + // True if N is (X >> C1) & C2 where C2 is a mask with all-ones in low bits. + virtual bool isBitFieldExtractPattern(const SDNode *N) const { + if (!haveBitFieldExtractPattern(N->getValueType(0))) + return false; + if (N->getOpcode() != ISD::AND) + return false; + const auto *AndCstMask = dyn_cast(N->getOperand(1)); + if (!AndCstMask) + return false; + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() != ISD::SRL || !isa(N0.getOperand(1))) + return false; + // Ok, if the mask is all-ones in low bits, then it is 'bit field extract'. + return AndCstMask->getAPIntValue().isMask(); + } + /// Return true if it is profitable to move this shift by a constant amount /// though its operand, adjusting any immediate operands as necessary to /// preserve semantics. This transformation may not be desirable if it - /// disrupts a particularly auspicious target-specific tree (e.g. bitfield - /// extraction in AArch64). By default, it returns true. + /// disrupts a particularly suspicious target-specific tree (e.g. bitfield + /// extraction in x86/AArch64). /// /// @param N the shift node /// @param Level the current DAGCombine legalization level. virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const { - return true; + // Desirable if it will not disturb the 'bit field extract' pattern. + return !isBitFieldExtractPattern(N->getOperand(0).getNode()); } // Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -363,9 +363,10 @@ const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; - /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. - bool isDesirableToCommuteWithShift(const SDNode *N, - CombineLevel Level) const override; + bool haveBitFieldExtractPattern(EVT VT) const override { + // UBFX is valid for i32 and i64 types. + return VT == MVT::i32 || VT == MVT::i64; + } /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8539,24 +8539,6 @@ return ScratchRegs; } -bool -AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N, - CombineLevel Level) const { - N = N->getOperand(0).getNode(); - EVT VT = N->getValueType(0); - // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine - // it with shift to let it be lowered to UBFX. - if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && - isa(N->getOperand(1))) { - uint64_t TruncMask = N->getConstantOperandVal(1); - if (isMask_64(TruncMask) && - N->getOperand(0).getOpcode() == ISD::SRL && - isa(N->getOperand(0)->getOperand(1))) - return false; - } - return true; -} - bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1034,6 +1034,8 @@ (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } + bool haveBitFieldExtractPattern(EVT VT) const override; + /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -4701,6 +4701,11 @@ return true; } +bool X86TargetLowering::haveBitFieldExtractPattern(EVT VT) const { + // BEXTR is valid for i32 and i64 types. + return Subtarget.hasBMI() && (VT == MVT::i32 || VT == MVT::i64); +} + /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Index: test/CodeGen/X86/extract-bits.ll =================================================================== --- test/CodeGen/X86/extract-bits.ll +++ test/CodeGen/X86/extract-bits.ll @@ -5615,23 +5615,69 @@ ; https://bugs.llvm.org/show_bug.cgi?id=38938 define void @pr38938(i32* %a0, i64* %a1) { -; X86-LABEL: pr38938: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: shrl $19, %ecx -; X86-NEXT: andl $4092, %ecx # imm = 0xFFC -; X86-NEXT: incl (%eax,%ecx) -; X86-NEXT: retl +; X86-NOBMI-LABEL: pr38938: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl (%ecx), %ecx +; X86-NOBMI-NEXT: shrl $19, %ecx +; X86-NOBMI-NEXT: andl $4092, %ecx # imm = 0xFFC +; X86-NOBMI-NEXT: incl (%eax,%ecx) +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: pr38938: -; X64: # %bb.0: -; X64-NEXT: movq (%rsi), %rax -; X64-NEXT: shrq $19, %rax -; X64-NEXT: andl $4092, %eax # imm = 0xFFC -; X64-NEXT: incl (%rdi,%rax) -; X64-NEXT: retq +; X86-BMI1NOTBM-LABEL: pr38938: +; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1NOTBM-NEXT: movl $2581, %edx # imm = 0xA15 +; X86-BMI1NOTBM-NEXT: bextrl %edx, (%ecx), %ecx +; X86-BMI1NOTBM-NEXT: incl (%eax,%ecx,4) +; X86-BMI1NOTBM-NEXT: retl +; +; X86-BMI1TBM-LABEL: pr38938: +; X86-BMI1TBM: # %bb.0: +; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1TBM-NEXT: bextrl $2581, (%ecx), %ecx # imm = 0xA15 +; X86-BMI1TBM-NEXT: incl (%eax,%ecx,4) +; X86-BMI1TBM-NEXT: retl +; +; X86-BMI1NOTBMBMI2-LABEL: pr38938: +; X86-BMI1NOTBMBMI2: # %bb.0: +; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1NOTBMBMI2-NEXT: movl $2581, %edx # imm = 0xA15 +; X86-BMI1NOTBMBMI2-NEXT: bextrl %edx, (%ecx), %ecx +; X86-BMI1NOTBMBMI2-NEXT: incl (%eax,%ecx,4) +; X86-BMI1NOTBMBMI2-NEXT: retl +; +; X64-NOBMI-LABEL: pr38938: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq (%rsi), %rax +; X64-NOBMI-NEXT: shrq $19, %rax +; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X64-NOBMI-NEXT: incl (%rdi,%rax) +; X64-NOBMI-NEXT: retq +; +; X64-BMI1NOTBM-LABEL: pr38938: +; X64-BMI1NOTBM: # %bb.0: +; X64-BMI1NOTBM-NEXT: movl $2581, %eax # imm = 0xA15 +; X64-BMI1NOTBM-NEXT: bextrl %eax, (%rsi), %eax +; X64-BMI1NOTBM-NEXT: incl (%rdi,%rax,4) +; X64-BMI1NOTBM-NEXT: retq +; +; X64-BMI1TBM-LABEL: pr38938: +; X64-BMI1TBM: # %bb.0: +; X64-BMI1TBM-NEXT: bextrl $2581, (%rsi), %eax # imm = 0xA15 +; X64-BMI1TBM-NEXT: incl (%rdi,%rax,4) +; X64-BMI1TBM-NEXT: retq +; +; X64-BMI1NOTBMBMI2-LABEL: pr38938: +; X64-BMI1NOTBMBMI2: # %bb.0: +; X64-BMI1NOTBMBMI2-NEXT: movl $2581, %eax # imm = 0xA15 +; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, (%rsi), %eax +; X64-BMI1NOTBMBMI2-NEXT: incl (%rdi,%rax,4) +; X64-BMI1NOTBMBMI2-NEXT: retq %tmp = load i64, i64* %a1, align 8 %tmp1 = lshr i64 %tmp, 21 %tmp2 = and i64 %tmp1, 1023 @@ -5718,19 +5764,59 @@ ; Should be still fine, but the result is shifted left afterwards define i32 @c2_i32(i32 %arg) { -; X86-LABEL: c2_i32: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl $17, %eax -; X86-NEXT: andl $4092, %eax # imm = 0xFFC -; X86-NEXT: retl +; X86-NOBMI-LABEL: c2_i32: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: shrl $17, %eax +; X86-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: c2_i32: -; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shrl $17, %eax -; X64-NEXT: andl $4092, %eax # imm = 0xFFC -; X64-NEXT: retq +; X86-BMI1NOTBM-LABEL: c2_i32: +; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: shll $2, %eax +; X86-BMI1NOTBM-NEXT: retl +; +; X86-BMI1TBM-LABEL: c2_i32: +; X86-BMI1TBM: # %bb.0: +; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13 +; X86-BMI1TBM-NEXT: shll $2, %eax +; X86-BMI1TBM-NEXT: retl +; +; X86-BMI1NOTBMBMI2-LABEL: c2_i32: +; X86-BMI1NOTBMBMI2: # %bb.0: +; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 +; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: shll $2, %eax +; X86-BMI1NOTBMBMI2-NEXT: retl +; +; X64-NOBMI-LABEL: c2_i32: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: shrl $17, %eax +; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC +; X64-NOBMI-NEXT: retq +; +; X64-BMI1NOTBM-LABEL: c2_i32: +; X64-BMI1NOTBM: # %bb.0: +; X64-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1NOTBM-NEXT: shll $2, %eax +; X64-BMI1NOTBM-NEXT: retq +; +; X64-BMI1TBM-LABEL: c2_i32: +; X64-BMI1TBM: # %bb.0: +; X64-BMI1TBM-NEXT: bextrl $2579, %edi, %eax # imm = 0xA13 +; X64-BMI1TBM-NEXT: shll $2, %eax +; X64-BMI1TBM-NEXT: retq +; +; X64-BMI1NOTBMBMI2-LABEL: c2_i32: +; X64-BMI1NOTBMBMI2: # %bb.0: +; X64-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 +; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1NOTBMBMI2-NEXT: shll $2, %eax +; X64-BMI1NOTBMBMI2-NEXT: retq %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 1023 %tmp2 = shl i32 %tmp1, 2