Index: llvm/trunk/lib/Target/X86/X86.td =================================================================== --- llvm/trunk/lib/Target/X86/X86.td +++ llvm/trunk/lib/Target/X86/X86.td @@ -427,6 +427,11 @@ "Prefer horizontal vector math instructions (haddp, phsub, etc.) over " "normal vector instructions with shuffles", [FeatureSSE3]>; +def FeatureFastScalarShiftMasks + : SubtargetFeature< + "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true", + "Prefer a left/right scalar logical shift pair over a shift+and pair">; + def FeatureFastVectorShiftMasks : SubtargetFeature< "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", @@ -784,6 +789,7 @@ FeatureSlowSHLD, FeatureLAHFSAHF, FeatureFast15ByteNOP, + FeatureFastScalarShiftMasks, FeatureFastVectorShiftMasks]; list BtVer1Features = BtVer1InheritableFeatures; @@ -825,6 +831,7 @@ FeatureSlowSHLD, FeatureLAHFSAHF, FeatureFast11ByteNOP, + FeatureFastScalarShiftMasks, FeatureBranchFusion]; list BdVer1Features = BdVer1InheritableFeatures; @@ -876,6 +883,7 @@ FeatureFastBEXTR, FeatureFast15ByteNOP, FeatureBranchFusion, + FeatureFastScalarShiftMasks, FeatureMMX, FeatureMOVBE, FeatureMWAITX, @@ -1092,20 +1100,22 @@ foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { def : Proc; + Feature64Bit, FeatureSlowSHLD, FeatureCMOV, + FeatureFastScalarShiftMasks]>; } foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { def : Proc; + FeatureSlowSHLD, FeatureCMOV, Feature64Bit, + FeatureFastScalarShiftMasks]>; } foreach P = ["amdfam10", "barcelona"] in { def : Proc; + Feature64Bit, FeatureFastScalarShiftMasks]>; } // Bobcat Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -5021,11 +5021,12 @@ (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && "Expected shift-shift mask"); - - if (Subtarget.hasFastVectorShiftMasks() && N->getValueType(0).isVector()) { + EVT VT = N->getValueType(0); + if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) || + (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) { // Only fold if the shift values are equal - so it folds to AND. - // TODO - we should fold if either is non-uniform but we don't do the - // fold for non-splats yet. + // TODO - we should fold if either is a non-uniform vector but we don't do + // the fold for non-splats yet. return N->getOperand(1) == N->getOperand(0).getOperand(1); } return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level); Index: llvm/trunk/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.h +++ llvm/trunk/lib/Target/X86/X86Subtarget.h @@ -396,6 +396,9 @@ /// Try harder to combine to horizontal vector ops if they are fast. bool HasFastHorizontalOps = false; + /// Prefer a left/right scalar logical shifts pair over a shift+and pair. + bool HasFastScalarShiftMasks = false; + /// Prefer a left/right vector logical shifts pair over a shift+and pair. bool HasFastVectorShiftMasks = false; @@ -650,6 +653,7 @@ bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } bool hasFastBEXTR() const { return HasFastBEXTR; } bool hasFastHorizontalOps() const { return HasFastHorizontalOps; } + bool hasFastScalarShiftMasks() const { return HasFastScalarShiftMasks; } bool hasFastVectorShiftMasks() const { return HasFastVectorShiftMasks; } bool hasMacroFusion() const { return HasMacroFusion; } bool hasBranchFusion() const { return HasBranchFusion; } Index: llvm/trunk/test/CodeGen/X86/shift-mask.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/shift-mask.ll +++ llvm/trunk/test/CodeGen/X86/shift-mask.ll @@ -43,13 +43,21 @@ ; X86-NEXT: andb $-32, %al ; X86-NEXT: retl ; -; X64-LABEL: test_i8_shl_lshr_1: -; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: leal (,%rdi,4), %eax -; X64-NEXT: andb $-32, %al -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq +; X64-MASK-LABEL: test_i8_shl_lshr_1: +; X64-MASK: # %bb.0: +; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi +; X64-MASK-NEXT: leal (,%rdi,4), %eax +; X64-MASK-NEXT: andb $-32, %al +; X64-MASK-NEXT: # kill: def $al killed $al killed $eax +; X64-MASK-NEXT: retq +; +; X64-SHIFT-LABEL: test_i8_shl_lshr_1: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: movl %edi, %eax +; X64-SHIFT-NEXT: shrb $3, %al +; X64-SHIFT-NEXT: shlb $5, %al +; X64-SHIFT-NEXT: # kill: def $al killed $al killed $eax +; X64-SHIFT-NEXT: retq %1 = lshr i8 %a0, 3 %2 = shl i8 %1, 5 ret i8 %2 @@ -63,13 +71,21 @@ ; X86-NEXT: andb $56, %al ; X86-NEXT: retl ; -; X64-LABEL: test_i8_shl_lshr_2: -; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shrb $2, %al -; X64-NEXT: andb $56, %al -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq +; X64-MASK-LABEL: test_i8_shl_lshr_2: +; X64-MASK: # %bb.0: +; X64-MASK-NEXT: movl %edi, %eax +; X64-MASK-NEXT: shrb $2, %al +; X64-MASK-NEXT: andb $56, %al +; X64-MASK-NEXT: # kill: def $al killed $al killed $eax +; X64-MASK-NEXT: retq +; +; X64-SHIFT-LABEL: test_i8_shl_lshr_2: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: # kill: def $edi killed $edi def $rdi +; X64-SHIFT-NEXT: shrb $5, %dil +; X64-SHIFT-NEXT: leal (,%rdi,8), %eax +; X64-SHIFT-NEXT: # kill: def $al killed $al killed $eax +; X64-SHIFT-NEXT: retq %1 = lshr i8 %a0, 5 %2 = shl i8 %1, 3 ret i8 %2 @@ -103,13 +119,21 @@ ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; -; X64-LABEL: test_i16_shl_lshr_1: -; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: leal (,%rdi,4), %eax -; X64-NEXT: andl $65504, %eax # imm = 0xFFE0 -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: retq +; X64-MASK-LABEL: test_i16_shl_lshr_1: +; X64-MASK: # %bb.0: +; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi +; X64-MASK-NEXT: leal (,%rdi,4), %eax +; X64-MASK-NEXT: andl $65504, %eax # imm = 0xFFE0 +; X64-MASK-NEXT: # kill: def $ax killed $ax killed $eax +; X64-MASK-NEXT: retq +; +; X64-SHIFT-LABEL: test_i16_shl_lshr_1: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: movzwl %di, %eax +; X64-SHIFT-NEXT: shrl $3, %eax +; X64-SHIFT-NEXT: shll $5, %eax +; X64-SHIFT-NEXT: # kill: def $ax killed $ax killed $eax +; X64-SHIFT-NEXT: retq %1 = lshr i16 %a0, 3 %2 = shl i16 %1, 5 ret i16 %2 @@ -124,13 +148,21 @@ ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; -; X64-LABEL: test_i16_shl_lshr_2: -; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shrl $2, %eax -; X64-NEXT: andl $16376, %eax # imm = 0x3FF8 -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: retq +; X64-MASK-LABEL: test_i16_shl_lshr_2: +; X64-MASK: # %bb.0: +; X64-MASK-NEXT: movl %edi, %eax +; X64-MASK-NEXT: shrl $2, %eax +; X64-MASK-NEXT: andl $16376, %eax # imm = 0x3FF8 +; X64-MASK-NEXT: # kill: def $ax killed $ax killed $eax +; X64-MASK-NEXT: retq +; +; X64-SHIFT-LABEL: test_i16_shl_lshr_2: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: movzwl %di, %eax +; X64-SHIFT-NEXT: shrl $5, %eax +; X64-SHIFT-NEXT: shll $3, %eax +; X64-SHIFT-NEXT: # kill: def $ax killed $ax killed $eax +; X64-SHIFT-NEXT: retq %1 = lshr i16 %a0, 5 %2 = shl i16 %1, 3 ret i16 %2 @@ -161,12 +193,19 @@ ; X86-NEXT: andl $-32, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_i32_shl_lshr_1: -; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: leal (,%rdi,4), %eax -; X64-NEXT: andl $-32, %eax -; X64-NEXT: retq +; X64-MASK-LABEL: test_i32_shl_lshr_1: +; X64-MASK: # %bb.0: +; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi +; X64-MASK-NEXT: leal (,%rdi,4), %eax +; X64-MASK-NEXT: andl $-32, %eax +; X64-MASK-NEXT: retq +; +; X64-SHIFT-LABEL: test_i32_shl_lshr_1: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: movl %edi, %eax +; X64-SHIFT-NEXT: shrl $3, %eax +; X64-SHIFT-NEXT: shll $5, %eax +; X64-SHIFT-NEXT: retq %1 = lshr i32 %a0, 3 %2 = shl i32 %1, 5 ret i32 %2 @@ -180,12 +219,19 @@ ; X86-NEXT: andl $-8, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_i32_shl_lshr_2: -; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shrl $2, %eax -; X64-NEXT: andl $-8, %eax -; X64-NEXT: retq +; X64-MASK-LABEL: test_i32_shl_lshr_2: +; X64-MASK: # %bb.0: +; X64-MASK-NEXT: movl %edi, %eax +; X64-MASK-NEXT: shrl $2, %eax +; X64-MASK-NEXT: andl $-8, %eax +; X64-MASK-NEXT: retq +; +; X64-SHIFT-LABEL: test_i32_shl_lshr_2: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: # kill: def $edi killed $edi def $rdi +; X64-SHIFT-NEXT: shrl $5, %edi +; X64-SHIFT-NEXT: leal (,%rdi,8), %eax +; X64-SHIFT-NEXT: retq %1 = lshr i32 %a0, 5 %2 = shl i32 %1, 3 ret i32 %2 @@ -219,11 +265,18 @@ ; X86-NEXT: shldl $2, %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_i64_shl_lshr_1: -; X64: # %bb.0: -; X64-NEXT: leaq (,%rdi,4), %rax -; X64-NEXT: andq $-32, %rax -; X64-NEXT: retq +; X64-MASK-LABEL: test_i64_shl_lshr_1: +; X64-MASK: # %bb.0: +; X64-MASK-NEXT: leaq (,%rdi,4), %rax +; X64-MASK-NEXT: andq $-32, %rax +; X64-MASK-NEXT: retq +; +; X64-SHIFT-LABEL: test_i64_shl_lshr_1: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: movq %rdi, %rax +; X64-SHIFT-NEXT: shrq $3, %rax +; X64-SHIFT-NEXT: shlq $5, %rax +; X64-SHIFT-NEXT: retq %1 = lshr i64 %a0, 3 %2 = shl i64 %1, 5 ret i64 %2 @@ -239,12 +292,18 @@ ; X86-NEXT: shrl $2, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_i64_shl_lshr_2: -; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shrq $2, %rax -; X64-NEXT: andq $-8, %rax -; X64-NEXT: retq +; X64-MASK-LABEL: test_i64_shl_lshr_2: +; X64-MASK: # %bb.0: +; X64-MASK-NEXT: movq %rdi, %rax +; X64-MASK-NEXT: shrq $2, %rax +; X64-MASK-NEXT: andq $-8, %rax +; X64-MASK-NEXT: retq +; +; X64-SHIFT-LABEL: test_i64_shl_lshr_2: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: shrq $5, %rdi +; X64-SHIFT-NEXT: leaq (,%rdi,8), %rax +; X64-SHIFT-NEXT: retq %1 = lshr i64 %a0, 5 %2 = shl i64 %1, 3 ret i64 %2