Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -35465,6 +35465,18 @@ return N00; } + // Fold (VSRAI (VSRAI X, C1), C2) --> (VSRAI X, (C1 + C2)) with (C1 + C2) + // clamped to (NumBitsPerElt - 1). + if (Opcode == X86ISD::VSRAI && N0.getOpcode() == X86ISD::VSRAI && + N0.hasOneUse()) { + unsigned ShiftVal2 = cast(N0.getOperand(1))->getZExtValue(); + unsigned NewShiftVal = ShiftVal + ShiftVal2; + if (NewShiftVal >= NumBitsPerElt) + NewShiftVal = NumBitsPerElt - 1; + return DAG.getNode(X86ISD::VSRAI, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(NewShiftVal, SDLoc(N), MVT::i8)); + } + // We can decode 'whole byte' logical bit shifts as shuffles. if (LogicalShift && (ShiftVal % 8) == 0) { SDValue Op(N, 0); Index: test/Analysis/CostModel/X86/testshiftashr.ll =================================================================== --- test/Analysis/CostModel/X86/testshiftashr.ll +++ test/Analysis/CostModel/X86/testshiftashr.ll @@ -261,7 +261,7 @@ ; SSE2-LABEL: shift4i16const ; SSE2: cost of 1 {{.*}} ashr ; SSE2-CODEGEN-LABEL: shift4i16const - ; SSE2-CODEGEN: psrad $3 + ; SSE2-CODEGEN: psrad $19 %0 = ashr %shifttypec4i16 %a , ret %shifttypec4i16 %0 @@ -476,7 +476,7 @@ ; SSE2-LABEL: shift4i8c ; SSE2: cost of 1 {{.*}} ashr ; SSE2-CODEGEN-LABEL: shift4i8c - ; SSE2-CODEGEN: psrad $3 + ; SSE2-CODEGEN: psrad $27 %0 = ashr %shifttypec4i8 %a , ret %shifttypec4i8 %0 @@ -488,7 +488,7 @@ ; SSE2-LABEL: shift8i8c ; SSE2: cost of 1 {{.*}} ashr ; SSE2-CODEGEN-LABEL: shift8i8c - ; SSE2-CODEGEN: psraw $3 + ; SSE2-CODEGEN: psraw $11 %0 = ashr %shifttypec8i8 %a , Index: test/CodeGen/X86/vector-shift-ashr-sub128.ll =================================================================== --- test/CodeGen/X86/vector-shift-ashr-sub128.ll +++ test/CodeGen/X86/vector-shift-ashr-sub128.ll @@ -2574,8 +2574,7 @@ ; AVX512-LABEL: splatconstant_shift_v2i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpsllq $32, %xmm0, %xmm0 -; AVX512-NEXT: vpsraq $32, %zmm0, %zmm0 -; AVX512-NEXT: vpsraq $5, %zmm0, %zmm0 +; AVX512-NEXT: vpsraq $37, %zmm0, %zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -2583,8 +2582,7 @@ ; AVX512VL-LABEL: splatconstant_shift_v2i32: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllq $32, %xmm0, %xmm0 -; AVX512VL-NEXT: vpsraq $32, %xmm0, %xmm0 -; AVX512VL-NEXT: vpsraq $5, %xmm0, %xmm0 +; AVX512VL-NEXT: vpsraq $37, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; X32-SSE-LABEL: splatconstant_shift_v2i32: @@ -2609,43 +2607,37 @@ ; SSE-LABEL: splatconstant_shift_v4i16: ; SSE: # %bb.0: ; SSE-NEXT: pslld $16, %xmm0 -; SSE-NEXT: psrad $16, %xmm0 -; SSE-NEXT: psrad $3, %xmm0 +; SSE-NEXT: psrad $19, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: splatconstant_shift_v4i16: ; AVX: # %bb.0: ; AVX-NEXT: vpslld $16, %xmm0, %xmm0 -; AVX-NEXT: vpsrad $16, %xmm0, %xmm0 -; AVX-NEXT: vpsrad $3, %xmm0, %xmm0 +; AVX-NEXT: vpsrad $19, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; XOP-LABEL: splatconstant_shift_v4i16: ; XOP: # %bb.0: ; XOP-NEXT: vpslld $16, %xmm0, %xmm0 -; XOP-NEXT: vpsrad $16, %xmm0, %xmm0 -; XOP-NEXT: vpsrad $3, %xmm0, %xmm0 +; XOP-NEXT: vpsrad $19, %xmm0, %xmm0 ; XOP-NEXT: retq ; ; AVX512-LABEL: splatconstant_shift_v4i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpslld $16, %xmm0, %xmm0 -; AVX512-NEXT: vpsrad $16, %xmm0, %xmm0 -; AVX512-NEXT: vpsrad $3, %xmm0, %xmm0 +; AVX512-NEXT: vpsrad $19, %xmm0, %xmm0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_shift_v4i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpslld $16, %xmm0, %xmm0 -; AVX512VL-NEXT: vpsrad $16, %xmm0, %xmm0 -; AVX512VL-NEXT: vpsrad $3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpsrad $19, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; X32-SSE-LABEL: splatconstant_shift_v4i16: ; X32-SSE: # %bb.0: ; X32-SSE-NEXT: pslld $16, %xmm0 -; X32-SSE-NEXT: psrad $16, %xmm0 -; X32-SSE-NEXT: psrad $3, %xmm0 +; X32-SSE-NEXT: psrad $19, %xmm0 ; X32-SSE-NEXT: retl %shift = ashr <4 x i16> %a, ret <4 x i16> %shift @@ -2717,8 +2709,7 @@ ; AVX512-LABEL: splatconstant_shift_v2i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpsllq $48, %xmm0, %xmm0 -; AVX512-NEXT: vpsraq $48, %zmm0, %zmm0 -; AVX512-NEXT: vpsraq $3, %zmm0, %zmm0 +; AVX512-NEXT: vpsraq $51, %zmm0, %zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -2726,8 +2717,7 @@ ; AVX512VL-LABEL: splatconstant_shift_v2i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllq $48, %xmm0, %xmm0 -; AVX512VL-NEXT: vpsraq $48, %xmm0, %xmm0 -; AVX512VL-NEXT: vpsraq $3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpsraq $51, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; X32-SSE-LABEL: splatconstant_shift_v2i16: @@ -2756,43 +2746,37 @@ ; SSE-LABEL: splatconstant_shift_v8i8: ; SSE: # %bb.0: ; SSE-NEXT: psllw $8, %xmm0 -; SSE-NEXT: psraw $8, %xmm0 -; SSE-NEXT: psraw $3, %xmm0 +; SSE-NEXT: psraw $11, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: splatconstant_shift_v8i8: ; AVX: # %bb.0: ; AVX-NEXT: vpsllw $8, %xmm0, %xmm0 -; AVX-NEXT: vpsraw $8, %xmm0, %xmm0 -; AVX-NEXT: vpsraw $3, %xmm0, %xmm0 +; AVX-NEXT: vpsraw $11, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; XOP-LABEL: splatconstant_shift_v8i8: ; XOP: # %bb.0: ; XOP-NEXT: vpsllw $8, %xmm0, %xmm0 -; XOP-NEXT: vpsraw $8, %xmm0, %xmm0 -; XOP-NEXT: vpsraw $3, %xmm0, %xmm0 +; XOP-NEXT: vpsraw $11, %xmm0, %xmm0 ; XOP-NEXT: retq ; ; AVX512-LABEL: splatconstant_shift_v8i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vpsllw $8, %xmm0, %xmm0 -; AVX512-NEXT: vpsraw $8, %xmm0, %xmm0 -; AVX512-NEXT: vpsraw $3, %xmm0, %xmm0 +; AVX512-NEXT: vpsraw $11, %xmm0, %xmm0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_shift_v8i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $8, %xmm0, %xmm0 -; AVX512VL-NEXT: vpsraw $8, %xmm0, %xmm0 -; AVX512VL-NEXT: vpsraw $3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpsraw $11, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; X32-SSE-LABEL: splatconstant_shift_v8i8: ; X32-SSE: # %bb.0: ; X32-SSE-NEXT: psllw $8, %xmm0 -; X32-SSE-NEXT: psraw $8, %xmm0 -; X32-SSE-NEXT: psraw $3, %xmm0 +; X32-SSE-NEXT: psraw $11, %xmm0 ; X32-SSE-NEXT: retl %shift = ashr <8 x i8> %a, ret <8 x i8> %shift @@ -2802,43 +2786,37 @@ ; SSE-LABEL: splatconstant_shift_v4i8: ; SSE: # %bb.0: ; SSE-NEXT: pslld $24, %xmm0 -; SSE-NEXT: psrad $24, %xmm0 -; SSE-NEXT: psrad $3, %xmm0 +; SSE-NEXT: psrad $27, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: splatconstant_shift_v4i8: ; AVX: # %bb.0: ; AVX-NEXT: vpslld $24, %xmm0, %xmm0 -; AVX-NEXT: vpsrad $24, %xmm0, %xmm0 -; AVX-NEXT: vpsrad $3, %xmm0, %xmm0 +; AVX-NEXT: vpsrad $27, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; XOP-LABEL: splatconstant_shift_v4i8: ; XOP: # %bb.0: ; XOP-NEXT: vpslld $24, %xmm0, %xmm0 -; XOP-NEXT: vpsrad $24, %xmm0, %xmm0 -; XOP-NEXT: vpsrad $3, %xmm0, %xmm0 +; XOP-NEXT: vpsrad $27, %xmm0, %xmm0 ; XOP-NEXT: retq ; ; AVX512-LABEL: splatconstant_shift_v4i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vpslld $24, %xmm0, %xmm0 -; AVX512-NEXT: vpsrad $24, %xmm0, %xmm0 -; AVX512-NEXT: vpsrad $3, %xmm0, %xmm0 +; AVX512-NEXT: vpsrad $27, %xmm0, %xmm0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_shift_v4i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpslld $24, %xmm0, %xmm0 -; AVX512VL-NEXT: vpsrad $24, %xmm0, %xmm0 -; AVX512VL-NEXT: vpsrad $3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpsrad $27, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; X32-SSE-LABEL: splatconstant_shift_v4i8: ; X32-SSE: # %bb.0: ; X32-SSE-NEXT: pslld $24, %xmm0 -; X32-SSE-NEXT: psrad $24, %xmm0 -; X32-SSE-NEXT: psrad $3, %xmm0 +; X32-SSE-NEXT: psrad $27, %xmm0 ; X32-SSE-NEXT: retl %shift = ashr <4 x i8> %a, ret <4 x i8> %shift @@ -2910,8 +2888,7 @@ ; AVX512-LABEL: splatconstant_shift_v2i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vpsllq $56, %xmm0, %xmm0 -; AVX512-NEXT: vpsraq $56, %zmm0, %zmm0 -; AVX512-NEXT: vpsraq $3, %zmm0, %zmm0 +; AVX512-NEXT: vpsraq $59, %zmm0, %zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -2919,8 +2896,7 @@ ; AVX512VL-LABEL: splatconstant_shift_v2i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllq $56, %xmm0, %xmm0 -; AVX512VL-NEXT: vpsraq $56, %xmm0, %xmm0 -; AVX512VL-NEXT: vpsraq $3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpsraq $59, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; X32-SSE-LABEL: splatconstant_shift_v2i8: