Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -17071,6 +17071,44 @@
   return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset);
 }
 
+static SDValue SRAFoldConstant(SDLoc DL, EVT VT, SDValue Cst1, SDValue Cst2,
+                               SelectionDAG &DAG) {
+
+  // For vectors extract each constant element so we can constant
+  // fold them individually.
+  BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1.getNode());
+  BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2.getNode());
+  if (!BV1 || !BV2)
+    return SDValue();
+
+  assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
+
+  EVT SVT = VT.getScalarType();
+  SmallVector<SDValue, 4> Outputs;
+  for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
+    ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
+    ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
+    if (!V1 || !V2) // Not a constant, bail.
+      return SDValue();
+
+    if (V1->isOpaque() || V2->isOpaque())
+      return SDValue();
+
+    if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
+      return SDValue();
+
+    // Fold one vector element.
+    const APInt &C1 = V1->getAPIntValue();
+    const APInt &C2 = V2->getAPIntValue();
+    unsigned shiftAmt = C2.getLimitedValue(C1.getBitWidth() - 1);
+    APInt Val = C1.ashr(shiftAmt);
+    Outputs.push_back(DAG.getConstant(Val, DL, SVT));
+  }
+
+  // Build a big vector out of the scalar elements we generated.
+  return DAG.getBuildVector(VT, SDLoc(), Outputs);
+}
+
 static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc dl(Op);
@@ -17683,6 +17721,27 @@
       return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
                                   Mask, PassThru, Subtarget, DAG);
     }
+    case INTR_SRA_MASK:
+    case INTR_SRA: {
+      SDValue Src1 = Op.getOperand(1);
+      SDValue Src2 = Op.getOperand(2);
+      //From SPEC: If the value specified in the respective data element of
+      // count is greater than element size then the destination data element
+      // are filled with the corresponding sign bit of the source element.
+      // This behavior is different from LLVM SRA (in such case the res is undef).
+      // Perform Constant folding before SRA creation.
+      SDValue SRA = SRAFoldConstant(dl, VT, Src1, Src2, DAG);
+
+      if(!SRA.getNode())
+        SRA = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2);
+
+      if (IntrData->Type == INTR_SRA)
+        return SRA;
+
+      return getVectorMaskingNode(SRA, Op.getOperand(4), Op.getOperand(3),
+                                  Subtarget, DAG);
+    }
+
     default:
       break;
     }
Index: lib/Target/X86/X86IntrinsicsInfo.h
===================================================================
--- lib/Target/X86/X86IntrinsicsInfo.h
+++ lib/Target/X86/X86IntrinsicsInfo.h
@@ -35,7 +35,8 @@
   TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
   EXPAND_FROM_MEM, STOREANT, BLEND, INSERT_SUBVEC,
   TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
-  FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
+  FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK,
+  INTR_SRA_MASK, INTR_SRA
 };
 
 struct IntrinsicData {
@@ -321,8 +322,8 @@
   X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
   X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
   X86_INTRINSIC_DATA(avx2_psrai_w, VSHIFT, X86ISD::VSRAI, 0),
-  X86_INTRINSIC_DATA(avx2_psrav_d, INTR_TYPE_2OP, ISD::SRA, 0),
-  X86_INTRINSIC_DATA(avx2_psrav_d_256, INTR_TYPE_2OP, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx2_psrav_d,     INTR_SRA, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx2_psrav_d_256, INTR_SRA, ISD::SRA, 0),
   X86_INTRINSIC_DATA(avx2_psrl_d, INTR_TYPE_2OP, X86ISD::VSRL, 0),
   X86_INTRINSIC_DATA(avx2_psrl_q, INTR_TYPE_2OP, X86ISD::VSRL, 0),
   X86_INTRINSIC_DATA(avx2_psrl_w, INTR_TYPE_2OP, X86ISD::VSRL, 0),
@@ -1434,15 +1435,15 @@
   X86_INTRINSIC_DATA(avx512_mask_psra_wi_512,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrai_d,       VSHIFT_MASK, X86ISD::VSRAI, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrai_q,       VSHIFT_MASK, X86ISD::VSRAI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav_d,       INTR_TYPE_2OP_MASK, ISD::SRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav_q,       INTR_TYPE_2OP_MASK, ISD::SRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav_q_128,   INTR_TYPE_2OP_MASK, ISD::SRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav_q_256,   INTR_TYPE_2OP_MASK, ISD::SRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav16_hi,    INTR_TYPE_2OP_MASK, ISD::SRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav32_hi,     INTR_TYPE_2OP_MASK, ISD::SRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav4_si,     INTR_TYPE_2OP_MASK, ISD::SRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav8_hi,     INTR_TYPE_2OP_MASK, ISD::SRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav8_si,     INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav_d,       INTR_SRA_MASK, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav_q,       INTR_SRA_MASK, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav_q_128,   INTR_SRA_MASK, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav_q_256,   INTR_SRA_MASK, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav16_hi,    INTR_SRA_MASK, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav32_hi,    INTR_SRA_MASK, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav4_si,     INTR_SRA_MASK, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav8_hi,     INTR_SRA_MASK, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav8_si,     INTR_SRA_MASK, ISD::SRA, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrl_d,        INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrl_d_128,    INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrl_d_256,    INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
Index: test/CodeGen/X86/avx2-intrinsics-x86.ll
===================================================================
--- test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -1457,8 +1457,21 @@
   %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   ret <4 x i32> %res
 }
-declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
 
+define <4 x i32> @test_x86_avx2_psrav_d_fold(<4 x i32> %a0, <4 x i32> %a1) {
+; AVX2-LABEL: test_x86_avx2_psrav_d_fold:
+; AVX2:       ## BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [1,0,4294967295,0]
+; AVX2-NEXT:    retl
+;
+; AVX512VL-LABEL: test_x86_avx2_psrav_d_fold:
+; AVX512VL:       ## BB#0:
+; AVX512VL-NEXT:    vmovdqa32 {{.*#+}} xmm0 = [1,0,4294967295,0]
+; AVX512VL-NEXT:    retl
+  %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>)
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; AVX2-LABEL: test_x86_avx2_psrav_d_256:
@@ -1473,6 +1486,20 @@
   %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
   ret <8 x i32> %res
 }
+
+define <8 x i32> @test_x86_avx2_psrav_d_256_fold(<8 x i32> %a0, <8 x i32> %a1) {
+; AVX2-LABEL: test_x86_avx2_psrav_d_256_fold:
+; AVX2:       ## BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [1,0,4294967295,0,4294967295,0,4294967295,0]
+; AVX2-NEXT:    retl
+;
+; AVX512VL-LABEL: test_x86_avx2_psrav_d_256_fold:
+; AVX512VL:       ## BB#0:
+; AVX512VL-NEXT:    vmovdqa32 {{.*#+}} ymm0 = [1,0,4294967295,0,4294967295,0,4294967295,0]
+; AVX512VL-NEXT:    retl
+  %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>)
+  ret <8 x i32> %res
+}
 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
 
 define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1, <4 x i32> %idx, <2 x double> %mask) {
Index: test/CodeGen/X86/avx512bw-intrinsics.ll
===================================================================
--- test/CodeGen/X86/avx512bw-intrinsics.ll
+++ test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -3107,6 +3107,22 @@
   ret <32 x i16> %res4
 }
 
+define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_fold(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psrav32_hi_fold:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vmovdqu16 {{.*#+}} zmm0 = [1,0,65535,0,65535,0,65535,0,1,0,65535,0,65535,0,65535,0,1,0,65535,0,65535,0,65535,0,1,0,65535,0,65535,0,65535,0]
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi_fold:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqu16 {{.*#+}} zmm0 = [1,0,65535,0,65535,0,65535,0,1,0,65535,0,65535,0,65535,0,1,0,65535,0,65535,0,65535,0,1,0,65535,0,65535,0,65535,0]
+; AVX512F-32-NEXT:    retl
+  %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> <i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>,
+                                                          <32 x i16> <i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49, i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49, i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49, i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49>,
+                                                          <32 x i16> zeroinitializer, i32 -1)
+  ret <32 x i16> %res
+}
+
 declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
 
 define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
Index: test/CodeGen/X86/avx512vl-intrinsics.ll
===================================================================
--- test/CodeGen/X86/avx512vl-intrinsics.ll
+++ test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -8063,6 +8063,17 @@
   ret <8 x i32> %res4
 }
 
+define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_fold() {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_si_fold:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovdqa32 {{.*#+}} ymm0 = [1,0,4294967295,0,4294967295,0,4294967295,0]
+; CHECK-NEXT:    ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A]
+; CHECK-NEXT:    ## fixup A - offset: 6, value: LCPI520_0-4, kind: reloc_riprel_4byte
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1)
+  ret <8 x i32> %res
+}
+
 declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
 
 define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
@@ -8083,6 +8094,17 @@
   ret <2 x i64> %res4
 }
 
+define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_fold(i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_128_fold:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovdqa64 {{.*#+}} xmm0 = [1,18446744073709551615]
+; CHECK-NEXT:    ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A]
+; CHECK-NEXT:    ## fixup A - offset: 6, value: LCPI522_0-4, kind: reloc_riprel_4byte
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1)
+  ret <2 x i64> %res
+}
+
 declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
 
 define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {