diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -9690,8 +9690,8 @@
 
 The first operand of an '``extractelement``' instruction is a value of
 :ref:`vector <t_vector>` type. The second operand is an index indicating
-the position from which to extract the element. The index may be a
-variable of any integer type.
+the position from which to extract the element. The index may be a 
+variable of any integer type, and will be treated as an unsigned integer.
 
 Semantics:
 """"""""""
@@ -9735,8 +9735,10 @@
 The first operand of an '``insertelement``' instruction is a value of
 :ref:`vector <t_vector>` type. The second operand is a scalar value whose
 type must equal the element type of the first operand. The third operand
-is an index indicating the position at which to insert the value. The
-index may be a variable of any integer type.
+is an index indicating the position at which to insert the value. The index 
+may be a variable of any integer type, and will be treated as an unsigned 
+integer.
+
 
 Semantics:
 """"""""""
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2808,7 +2808,7 @@
   Register Idx;
   if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
     if (CI->getBitWidth() != PreferredVecIdxWidth) {
-      APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
+      APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth);
       auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
       Idx = getOrCreateVReg(*NewIdxCI);
     }
@@ -2817,7 +2817,7 @@
     Idx = getOrCreateVReg(*U.getOperand(1));
   if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
     const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
-    Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx).getReg(0);
+    Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0);
   }
   MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
   return true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3545,7 +3545,7 @@
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue InVec = getValue(I.getOperand(0));
   SDValue InVal = getValue(I.getOperand(1));
-  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
+  SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
                                      TLI.getVectorIdxTy(DAG.getDataLayout()));
   setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
                            TLI.getValueType(DAG.getDataLayout(), I.getType()),
@@ -3555,7 +3555,7 @@
 void SelectionDAGBuilder::visitExtractElement(const User &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue InVec = getValue(I.getOperand(0));
-  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
+  SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
                                      TLI.getVectorIdxTy(DAG.getDataLayout()));
   setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
                            TLI.getValueType(DAG.getDataLayout(), I.getType()),
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -1549,7 +1549,7 @@
 ; CHECK-LABEL: name: test_extractelement
 ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0
 ; CHECK: [[IDX:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK: [[IDXEXT:%[0-9]+]]:_(s64) = G_SEXT [[IDX]]
+; CHECK: [[IDXEXT:%[0-9]+]]:_(s64) = G_ZEXT [[IDX]]
 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDXEXT]](s64)
 ; CHECK: $w0 = COPY [[RES]](s32)
   %res = extractelement <2 x i32> %vec, i32 %idx
@@ -1566,6 +1566,27 @@
   ret i32 %res
 }
 
+define i32 @test_extractelement_const_idx_zext_i1(<2 x i32> %vec) {
+; CHECK-LABEL: name: test_extractelement
+; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s64)
+; CHECK: $w0 = COPY [[RES]](s32)
+  %res = extractelement <2 x i32> %vec, i1 true
+  ret i32 %res
+}
+
+define i32 @test_extractelement_const_idx_zext_i8(<2 x i32> %vec) {
+; CHECK-LABEL: name: test_extractelement
+; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
+; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s64)
+; CHECK: $w0 = COPY [[RES]](s32)
+  %res = extractelement <2 x i32> %vec, i8 255
+  ret i32 %res
+}
+
+
 define i32 @test_singleelementvector(i32 %elt){
 ; CHECK-LABEL: name: test_singleelementvector
 ; CHECK: [[ELT:%[0-9]+]]:_(s32) = COPY $w0
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -9143,8 +9143,7 @@
 define i32 @load_single_extract_variable_index_v3i32_small_align(<3 x i32>* %A, i32 %idx) {
 ; CHECK-LABEL: load_single_extract_variable_index_v3i32_small_align:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT:    sxtw x9, w1
+; CHECK-NEXT:    mov w9, w1
 ; CHECK-NEXT:    mov w8, #2
 ; CHECK-NEXT:    cmp x9, #2
 ; CHECK-NEXT:    csel x8, x9, x8, lo
@@ -9158,8 +9157,7 @@
 define i32 @load_single_extract_variable_index_v3i32_default_align(<3 x i32>* %A, i32 %idx) {
 ; CHECK-LABEL: load_single_extract_variable_index_v3i32_default_align:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT:    sxtw x9, w1
+; CHECK-NEXT:    mov w9, w1
 ; CHECK-NEXT:    mov w8, #2
 ; CHECK-NEXT:    cmp x9, #2
 ; CHECK-NEXT:    csel x8, x9, x8, lo
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
--- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
@@ -292,8 +292,7 @@
 define i8 @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) #0 {
 ; CHECK-LABEL: test_lanex_16xi8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    whilels p0.b, xzr, x8
 ; CHECK-NEXT:    lastb w0, p0, z0.b
 ; CHECK-NEXT:    ret
@@ -304,8 +303,7 @@
 define i16 @test_lanex_8xi16(<vscale x 8 x i16> %a, i32 %x) #0 {
 ; CHECK-LABEL: test_lanex_8xi16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    whilels p0.h, xzr, x8
 ; CHECK-NEXT:    lastb w0, p0, z0.h
 ; CHECK-NEXT:    ret
@@ -316,8 +314,7 @@
 define i32 @test_lanex_4xi32(<vscale x 4 x i32> %a, i32 %x) #0 {
 ; CHECK-LABEL: test_lanex_4xi32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    whilels p0.s, xzr, x8
 ; CHECK-NEXT:    lastb w0, p0, z0.s
 ; CHECK-NEXT:    ret
@@ -328,8 +325,7 @@
 define i64 @test_lanex_2xi64(<vscale x 2 x i64> %a, i32 %x) #0 {
 ; CHECK-LABEL: test_lanex_2xi64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    whilels p0.d, xzr, x8
 ; CHECK-NEXT:    lastb x0, p0, z0.d
 ; CHECK-NEXT:    ret
@@ -340,8 +336,7 @@
 define half @test_lanex_8xf16(<vscale x 8 x half> %a, i32 %x) #0 {
 ; CHECK-LABEL: test_lanex_8xf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    whilels p0.h, xzr, x8
 ; CHECK-NEXT:    lastb h0, p0, z0.h
 ; CHECK-NEXT:    ret
@@ -352,8 +347,7 @@
 define half @test_lanex_4xf16(<vscale x 4 x half> %a, i32 %x) #0 {
 ; CHECK-LABEL: test_lanex_4xf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    whilels p0.s, xzr, x8
 ; CHECK-NEXT:    lastb h0, p0, z0.h
 ; CHECK-NEXT:    ret
@@ -364,8 +358,7 @@
 define half @test_lanex_2xf16(<vscale x 2 x half> %a, i32 %x) #0 {
 ; CHECK-LABEL: test_lanex_2xf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    whilels p0.d, xzr, x8
 ; CHECK-NEXT:    lastb h0, p0, z0.h
 ; CHECK-NEXT:    ret
@@ -376,8 +369,7 @@
 define float @test_lanex_4xf32(<vscale x 4 x float> %a, i32 %x) #0 {
 ; CHECK-LABEL: test_lanex_4xf32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    whilels p0.s, xzr, x8
 ; CHECK-NEXT:    lastb s0, p0, z0.s
 ; CHECK-NEXT:    ret
@@ -388,8 +380,7 @@
 define float @test_lanex_2xf32(<vscale x 2 x float> %a, i32 %x) #0 {
 ; CHECK-LABEL: test_lanex_2xf32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    whilels p0.d, xzr, x8
 ; CHECK-NEXT:    lastb s0, p0, z0.s
 ; CHECK-NEXT:    ret
@@ -400,8 +391,7 @@
 define double @test_lanex_2xf64(<vscale x 2 x double> %a, i32 %x) #0 {
 ; CHECK-LABEL: test_lanex_2xf64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    whilels p0.d, xzr, x8
 ; CHECK-NEXT:    lastb d0, p0, z0.d
 ; CHECK-NEXT:    ret
@@ -518,8 +508,7 @@
 define i1 @test_lanex_4xi1(<vscale x 4 x i1> %a, i32 %x) #0 {
 ; CHECK-LABEL: test_lanex_4xi1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
 ; CHECK-NEXT:    whilels p0.s, xzr, x8
 ; CHECK-NEXT:    lastb w8, p0, z0.s
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
--- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
@@ -128,8 +128,7 @@
 define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
 ; CHECK-LABEL: test_lanex_16xi8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov w9, #30
 ; CHECK-NEXT:    index z2.b, #0, #1
 ; CHECK-NEXT:    ptrue p0.b
@@ -389,8 +388,7 @@
 define <vscale x 8 x i1> @test_predicate_insert_8xi1_immediate (<vscale x 8 x i1> %val, i32 %idx) {
 ; CHECK-LABEL: test_predicate_insert_8xi1_immediate:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov w9, #1
 ; CHECK-NEXT:    index z1.h, #0, #1
 ; CHECK-NEXT:    ptrue p1.h
@@ -427,8 +425,7 @@
 define <vscale x 2 x i1> @test_predicate_insert_2xi1(<vscale x 2 x i1> %val, i1 %elt, i32 %idx) {
 ; CHECK-LABEL: test_predicate_insert_2xi1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    mov w8, w1
 ; CHECK-NEXT:    index z1.d, #0, #1
 ; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
@@ -446,8 +443,7 @@
 define <vscale x 4 x i1> @test_predicate_insert_4xi1(<vscale x 4 x i1> %val, i1 %elt, i32 %idx) {
 ; CHECK-LABEL: test_predicate_insert_4xi1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    mov w8, w1
 ; CHECK-NEXT:    index z1.s, #0, #1
 ; CHECK-NEXT:    ptrue p1.s
 ; CHECK-NEXT:    mov z0.s, w8
@@ -463,8 +459,7 @@
 define <vscale x 8 x i1> @test_predicate_insert_8xi1(<vscale x 8 x i1> %val, i1 %elt, i32 %idx) {
 ; CHECK-LABEL: test_predicate_insert_8xi1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    mov w8, w1
 ; CHECK-NEXT:    index z1.h, #0, #1
 ; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    mov z0.h, w8
@@ -481,8 +476,7 @@
 define <vscale x 16 x i1> @test_predicate_insert_16xi1(<vscale x 16 x i1> %val, i1 %elt, i32 %idx) {
 ; CHECK-LABEL: test_predicate_insert_16xi1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    mov w8, w1
 ; CHECK-NEXT:    index z1.b, #0, #1
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    mov z0.b, w8
@@ -505,8 +499,7 @@
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
 ; CHECK-NEXT:    mov x8, #-1
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT:    sxtw x9, w1
+; CHECK-NEXT:    mov w9, w1
 ; CHECK-NEXT:    mov z0.b, p1/z, #1 // =0x1
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    st1b { z0.b }, p1, [sp, #1, mul vl]
diff --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
--- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
@@ -6,8 +6,7 @@
 define i32 @promote_extract_2i32_idx(<vscale x 2 x i32> %a, i32 %idx) {
 ; CHECK-LABEL: promote_extract_2i32_idx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    whilels p0.d, xzr, x8
 ; CHECK-NEXT:    lastb x0, p0, z0.d
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -25,8 +24,7 @@
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
 ; CHECK-NEXT:    mov x8, #-1
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x9, w0
+; CHECK-NEXT:    mov w9, w0
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    st1b { z1.b }, p0, [sp, #1, mul vl]
 ; CHECK-NEXT:    st1b { z0.b }, p0, [sp]
@@ -51,8 +49,7 @@
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
 ; CHECK-NEXT:    mov x8, #-1
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x9, w0
+; CHECK-NEXT:    mov w9, w0
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #1, mul vl]
 ; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
@@ -77,8 +74,7 @@
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
 ; CHECK-NEXT:    cnth x8
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x9, w0
+; CHECK-NEXT:    mov w9, w0
 ; CHECK-NEXT:    sub x8, x8, #1
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    cmp x9, x8
@@ -103,8 +99,7 @@
 ; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
 ; CHECK-NEXT:    cnth x8
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x9, w0
+; CHECK-NEXT:    mov w9, w0
 ; CHECK-NEXT:    sub x8, x8, #1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    cmp x9, x8
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -march=amdgcn -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+
+define i8 @f_i1_1() {
+  ; CHECK-LABEL: name: f_i1_1
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(<256 x s8>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32)
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
+  %E1 = extractelement <256 x i8> undef, i1 true
+  ret i8 %E1
+}
+
+define i8 @f_i8_255() {
+  ; CHECK-LABEL: name: f_i8_255
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(<256 x s8>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+  ; CHECK-NEXT:   [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32)
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
+  %E1 = extractelement <256 x i8> undef, i8 255
+  ret i8 %E1
+}
diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations.ll b/llvm/test/CodeGen/Mips/msa/basic_operations.ll
--- a/llvm/test/CodeGen/Mips/msa/basic_operations.ll
+++ b/llvm/test/CodeGen/Mips/msa/basic_operations.ll
@@ -1315,7 +1315,7 @@
 ; N64-NEXT:    ld.b $w0, 0($2)
 ; N64-NEXT:    addv.b $w0, $w0, $w0
 ; N64-NEXT:    ld $1, %got_disp(i32)($1)
-; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    lwu $1, 0($1)
 ; N64-NEXT:    splat.b $w0, $w0[$1]
 ; N64-NEXT:    mfc1 $1, $f0
 ; N64-NEXT:    sra $1, $1, 24
@@ -1371,7 +1371,7 @@
 ; N64-NEXT:    ld.h $w0, 0($2)
 ; N64-NEXT:    addv.h $w0, $w0, $w0
 ; N64-NEXT:    ld $1, %got_disp(i32)($1)
-; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    lwu $1, 0($1)
 ; N64-NEXT:    splat.h $w0, $w0[$1]
 ; N64-NEXT:    mfc1 $1, $f0
 ; N64-NEXT:    sra $1, $1, 16
@@ -1423,7 +1423,7 @@
 ; N64-NEXT:    ld.w $w0, 0($2)
 ; N64-NEXT:    addv.w $w0, $w0, $w0
 ; N64-NEXT:    ld $1, %got_disp(i32)($1)
-; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    lwu $1, 0($1)
 ; N64-NEXT:    splat.w $w0, $w0[$1]
 ; N64-NEXT:    jr $ra
 ; N64-NEXT:    mfc1 $2, $f0
@@ -1495,7 +1495,7 @@
 ; N64-NEXT:    ld.d $w0, 0($2)
 ; N64-NEXT:    addv.d $w0, $w0, $w0
 ; N64-NEXT:    ld $1, %got_disp(i32)($1)
-; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    lwu $1, 0($1)
 ; N64-NEXT:    splat.d $w0, $w0[$1]
 ; N64-NEXT:    jr $ra
 ; N64-NEXT:    dmfc1 $2, $f0
@@ -1546,7 +1546,7 @@
 ; N64-NEXT:    ld.b $w0, 0($2)
 ; N64-NEXT:    addv.b $w0, $w0, $w0
 ; N64-NEXT:    ld $1, %got_disp(i32)($1)
-; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    lwu $1, 0($1)
 ; N64-NEXT:    splat.b $w0, $w0[$1]
 ; N64-NEXT:    mfc1 $1, $f0
 ; N64-NEXT:    jr $ra
@@ -1599,7 +1599,7 @@
 ; N64-NEXT:    ld.h $w0, 0($2)
 ; N64-NEXT:    addv.h $w0, $w0, $w0
 ; N64-NEXT:    ld $1, %got_disp(i32)($1)
-; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    lwu $1, 0($1)
 ; N64-NEXT:    splat.h $w0, $w0[$1]
 ; N64-NEXT:    mfc1 $1, $f0
 ; N64-NEXT:    jr $ra
@@ -1650,7 +1650,7 @@
 ; N64-NEXT:    ld.w $w0, 0($2)
 ; N64-NEXT:    addv.w $w0, $w0, $w0
 ; N64-NEXT:    ld $1, %got_disp(i32)($1)
-; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    lwu $1, 0($1)
 ; N64-NEXT:    splat.w $w0, $w0[$1]
 ; N64-NEXT:    jr $ra
 ; N64-NEXT:    mfc1 $2, $f0
@@ -1722,7 +1722,7 @@
 ; N64-NEXT:    ld.d $w0, 0($2)
 ; N64-NEXT:    addv.d $w0, $w0, $w0
 ; N64-NEXT:    ld $1, %got_disp(i32)($1)
-; N64-NEXT:    lw $1, 0($1)
+; N64-NEXT:    lwu $1, 0($1)
 ; N64-NEXT:    splat.d $w0, $w0[$1]
 ; N64-NEXT:    jr $ra
 ; N64-NEXT:    dmfc1 $2, $f0
@@ -1934,7 +1934,7 @@
 ; N64-NEXT:    daddu $1, $1, $25
 ; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx)))
 ; N64-NEXT:    ld $2, %got_disp(i32)($1)
-; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    lwu $2, 0($2)
 ; N64-NEXT:    ld $1, %got_disp(v16i8)($1)
 ; N64-NEXT:    ld.b $w0, 0($1)
 ; N64-NEXT:    sld.b $w0, $w0[$2]
@@ -1994,7 +1994,7 @@
 ; N64-NEXT:    daddu $1, $1, $25
 ; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx)))
 ; N64-NEXT:    ld $2, %got_disp(i32)($1)
-; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    lwu $2, 0($2)
 ; N64-NEXT:    ld $1, %got_disp(v8i16)($1)
 ; N64-NEXT:    ld.h $w0, 0($1)
 ; N64-NEXT:    dsll $2, $2, 1
@@ -2055,7 +2055,7 @@
 ; N64-NEXT:    daddu $1, $1, $25
 ; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx)))
 ; N64-NEXT:    ld $2, %got_disp(i32)($1)
-; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    lwu $2, 0($2)
 ; N64-NEXT:    ld $1, %got_disp(v4i32)($1)
 ; N64-NEXT:    ld.w $w0, 0($1)
 ; N64-NEXT:    dsll $2, $2, 2
@@ -2124,7 +2124,7 @@
 ; N64-NEXT:    daddu $1, $1, $25
 ; N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx)))
 ; N64-NEXT:    ld $2, %got_disp(i32)($1)
-; N64-NEXT:    lw $2, 0($2)
+; N64-NEXT:    lwu $2, 0($2)
 ; N64-NEXT:    ld $1, %got_disp(v2i64)($1)
 ; N64-NEXT:    ld.d $w0, 0($1)
 ; N64-NEXT:    dsll $2, $2, 3
diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll b/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll
--- a/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll
+++ b/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll
@@ -193,10 +193,9 @@
   ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
   ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
   %4 = extractelement <4 x float> %2, i32 %3
-  ; ALL-DAG: splat.w $w0, [[R1]][[[IDX]]]
+  ; ALL-DAG: splat.w $w0, [[R1]][[[PTR_I]]]
 
   ret float %4
 }
@@ -259,10 +258,9 @@
   ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
   ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
   %4 = extractelement <2 x double> %2, i32 %3
-  ; ALL-DAG: splat.d $w0, [[R1]][[[IDX]]]
+  ; ALL-DAG: splat.d $w0, [[R1]][[[PTR_I]]]
 
   ret double %4
 }
@@ -312,11 +310,10 @@
   ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
   ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
   %3 = insertelement <4 x float> %1, float %a, i32 %2
   ; float argument passed in $f12
-  ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
+  ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[PTR_I]], 2
   ; ALL-DAG: sld.b [[R1]], [[R1]][[[BIDX]]]
   ; ALL-DAG: insve.w [[R1]][0], $w12[0]
   ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
@@ -341,11 +338,10 @@
   ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
   ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
-  ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
   %3 = insertelement <2 x double> %1, double %a, i32 %2
   ; double argument passed in $f12
-  ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 3
+  ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[PTR_I]], 3
   ; ALL-DAG: sld.b [[R1]], [[R1]][[[BIDX]]]
   ; ALL-DAG: insve.d [[R1]][0], $w12[0]
   ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
diff --git a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
--- a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
@@ -715,7 +715,6 @@
 define double @conv2dlbTestuiVar(<4 x i32> %a, i32 zeroext %elem) {
 ; CHECK-64-LABEL: conv2dlbTestuiVar:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    extsw 3, 3
 ; CHECK-64-NEXT:    rlwinm 3, 3, 2, 28, 29
 ; CHECK-64-NEXT:    vextuwlx 3, 3, 2
 ; CHECK-64-NEXT:    mtfprwz 0, 3
diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll
--- a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll
@@ -5,6 +5,7 @@
 define zeroext i8 @test1(<16 x i8> %a, i32 signext %index) {
 ; CHECK-64-LABEL: test1:
 ; CHECK-64:       # %bb.0: # %entry
+; CHECK-64-NEXT:    clrldi 3, 3, 32
 ; CHECK-64-NEXT:    vextublx 3, 3, 2
 ; CHECK-64-NEXT:    clrldi 3, 3, 56
 ; CHECK-64-NEXT:    blr
@@ -24,6 +25,7 @@
 define signext i8 @test2(<16 x i8> %a, i32 signext %index) {
 ; CHECK-64-LABEL: test2:
 ; CHECK-64:       # %bb.0: # %entry
+; CHECK-64-NEXT:    clrldi 3, 3, 32
 ; CHECK-64-NEXT:    vextublx 3, 3, 2
 ; CHECK-64-NEXT:    extsb 3, 3
 ; CHECK-64-NEXT:    blr
@@ -44,6 +46,7 @@
 define zeroext i16 @test3(<8 x i16> %a, i32 signext %index) {
 ; CHECK-64-LABEL: test3:
 ; CHECK-64:       # %bb.0: # %entry
+; CHECK-64-NEXT:    clrldi 3, 3, 32
 ; CHECK-64-NEXT:    rlwinm 3, 3, 1, 28, 30
 ; CHECK-64-NEXT:    vextuhlx 3, 3, 2
 ; CHECK-64-NEXT:    clrldi 3, 3, 48
@@ -64,6 +67,7 @@
 define signext i16 @test4(<8 x i16> %a, i32 signext %index) {
 ; CHECK-64-LABEL: test4:
 ; CHECK-64:       # %bb.0: # %entry
+; CHECK-64-NEXT:    clrldi 3, 3, 32
 ; CHECK-64-NEXT:    rlwinm 3, 3, 1, 28, 30
 ; CHECK-64-NEXT:    vextuhlx 3, 3, 2
 ; CHECK-64-NEXT:    extsh 3, 3
@@ -84,6 +88,7 @@
 define zeroext i32 @test5(<4 x i32> %a, i32 signext %index) {
 ; CHECK-64-LABEL: test5:
 ; CHECK-64:       # %bb.0: # %entry
+; CHECK-64-NEXT:    clrldi 3, 3, 32
 ; CHECK-64-NEXT:    rlwinm 3, 3, 2, 28, 29
 ; CHECK-64-NEXT:    vextuwlx 3, 3, 2
 ; CHECK-64-NEXT:    blr
@@ -103,6 +108,7 @@
 define signext i32 @test6(<4 x i32> %a, i32 signext %index) {
 ; CHECK-64-LABEL: test6:
 ; CHECK-64:       # %bb.0: # %entry
+; CHECK-64-NEXT:    clrldi 3, 3, 32
 ; CHECK-64-NEXT:    rlwinm 3, 3, 2, 28, 29
 ; CHECK-64-NEXT:    vextuwlx 3, 3, 2
 ; CHECK-64-NEXT:    extsw 3, 3
diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll
--- a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll
@@ -5,6 +5,7 @@
 define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) {
 ; CHECK-64-LABEL: test_add1:
 ; CHECK-64:       # %bb.0: # %entry
+; CHECK-64-NEXT:    clrldi 3, 3, 32
 ; CHECK-64-NEXT:    vextublx 3, 3, 2
 ; CHECK-64-NEXT:    add 3, 3, 4
 ; CHECK-64-NEXT:    clrldi 3, 3, 56
@@ -31,6 +32,7 @@
 define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) {
 ; CHECK-64-LABEL: test_add2:
 ; CHECK-64:       # %bb.0: # %entry
+; CHECK-64-NEXT:    clrldi 3, 3, 32
 ; CHECK-64-NEXT:    vextublx 3, 3, 2
 ; CHECK-64-NEXT:    add 3, 3, 4
 ; CHECK-64-NEXT:    extsb 3, 3
@@ -57,6 +59,7 @@
 define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) {
 ; CHECK-64-LABEL: test_add3:
 ; CHECK-64:       # %bb.0: # %entry
+; CHECK-64-NEXT:    clrldi 3, 3, 32
 ; CHECK-64-NEXT:    rlwinm 3, 3, 1, 28, 30
 ; CHECK-64-NEXT:    vextuhlx 3, 3, 2
 ; CHECK-64-NEXT:    add 3, 3, 4
@@ -84,6 +87,7 @@
 define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) {
 ; CHECK-64-LABEL: test_add4:
 ; CHECK-64:       # %bb.0: # %entry
+; CHECK-64-NEXT:    clrldi 3, 3, 32
 ; CHECK-64-NEXT:    rlwinm 3, 3, 1, 28, 30
 ; CHECK-64-NEXT:    vextuhlx 3, 3, 2
 ; CHECK-64-NEXT:    add 3, 3, 4
@@ -111,6 +115,7 @@
 define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) {
 ; CHECK-64-LABEL: test_add5:
 ; CHECK-64:       # %bb.0: # %entry
+; CHECK-64-NEXT:    clrldi 3, 3, 32
 ; CHECK-64-NEXT:    rlwinm 3, 3, 2, 28, 29
 ; CHECK-64-NEXT:    vextuwlx 3, 3, 2
 ; CHECK-64-NEXT:    add 3, 3, 4
@@ -134,6 +139,7 @@
 define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) {
 ; CHECK-64-LABEL: test_add6:
 ; CHECK-64:       # %bb.0: # %entry
+; CHECK-64-NEXT:    clrldi 3, 3, 32
 ; CHECK-64-NEXT:    rlwinm 3, 3, 2, 28, 29
 ; CHECK-64-NEXT:    vextuwlx 3, 3, 2
 ; CHECK-64-NEXT:    add 3, 3, 4
diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
--- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
@@ -266,8 +266,8 @@
 define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
 ; CHECK-64-LABEL: testFloat1:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-DAG:     rlwinm 3, 4, 2, 28, 29
-; CHECK-64-DAG:     addi 4, 1, -16
+; CHECK-64-NEXT:    rlwinm 3, 4, 2, 28, 29
+; CHECK-64-NEXT:    addi 4, 1, -16
 ; CHECK-64-NEXT:    stxv 34, -16(1)
 ; CHECK-64-NEXT:    stfsx 1, 4, 3
 ; CHECK-64-NEXT:    lxv 34, -16(1)
@@ -285,8 +285,7 @@
 ; CHECK-64-P10-LABEL: testFloat1:
 ; CHECK-64-P10:       # %bb.0: # %entry
 ; CHECK-64-P10-NEXT:    xscvdpspn 35, 1
-; CHECK-64-P10-NEXT:    extsw 3, 4
-; CHECK-64-P10-NEXT:    slwi 3, 3, 2
+; CHECK-64-P10-NEXT:    slwi 3, 4, 2
 ; CHECK-64-P10-NEXT:    vinswvlx 2, 3, 3
 ; CHECK-64-P10-NEXT:    blr
 ;
@@ -305,16 +304,16 @@
 ; CHECK-64-LABEL: testFloat2:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    lwz 6, 0(3)
-; CHECK-64-DAG:     rlwinm 4, 4, 2, 28, 29
-; CHECK-64-DAG:     addi 7, 1, -16
+; CHECK-64-NEXT:    addi 7, 1, -16
+; CHECK-64-NEXT:    rlwinm 4, 4, 2, 28, 29
 ; CHECK-64-NEXT:    stxv 34, -16(1)
+; CHECK-64-NEXT:    rlwinm 5, 5, 2, 28, 29
 ; CHECK-64-NEXT:    stwx 6, 7, 4
-; CHECK-64-NEXT:    rlwinm 4, 5, 2, 28, 29
-; CHECK-64-NEXT:    addi 5, 1, -32
+; CHECK-64-NEXT:    addi 4, 1, -32
 ; CHECK-64-NEXT:    lxv 0, -16(1)
 ; CHECK-64-NEXT:    lwz 3, 1(3)
 ; CHECK-64-NEXT:    stxv 0, -32(1)
-; CHECK-64-NEXT:    stwx 3, 5, 4
+; CHECK-64-NEXT:    stwx 3, 4, 5
 ; CHECK-64-NEXT:    lxv 34, -32(1)
 ; CHECK-64-NEXT:    blr
 ;
@@ -337,12 +336,10 @@
 ; CHECK-64-P10-LABEL: testFloat2:
 ; CHECK-64-P10:       # %bb.0: # %entry
 ; CHECK-64-P10-NEXT:    lwz 6, 0(3)
-; CHECK-64-P10-NEXT:    extsw 4, 4
 ; CHECK-64-P10-NEXT:    lwz 3, 1(3)
 ; CHECK-64-P10-NEXT:    slwi 4, 4, 2
 ; CHECK-64-P10-NEXT:    vinswlx 2, 4, 6
-; CHECK-64-P10-NEXT:    extsw 4, 5
-; CHECK-64-P10-NEXT:    slwi 4, 4, 2
+; CHECK-64-P10-NEXT:    slwi 4, 5, 2
 ; CHECK-64-P10-NEXT:    vinswlx 2, 4, 3
 ; CHECK-64-P10-NEXT:    blr
 ;
@@ -370,8 +367,9 @@
 ; CHECK-64-LABEL: testFloat3:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    lis 6, 1
-; CHECK-64-DAG:         rlwinm 4, 4, 2, 28, 29
-; CHECK-64-DAG:    addi 7, 1, -16
+; CHECK-64-NEXT:    addi 7, 1, -16
+; CHECK-64-NEXT:    rlwinm 4, 4, 2, 28, 29
+; CHECK-64-NEXT:    rlwinm 5, 5, 2, 28, 29
 ; CHECK-64-NEXT:    lwzx 6, 3, 6
 ; CHECK-64-NEXT:    stxv 34, -16(1)
 ; CHECK-64-NEXT:    stwx 6, 7, 4
@@ -379,10 +377,9 @@
 ; CHECK-64-NEXT:    lxv 0, -16(1)
 ; CHECK-64-NEXT:    rldic 4, 4, 36, 27
 ; CHECK-64-NEXT:    lwzx 3, 3, 4
-; CHECK-64-NEXT:    rlwinm 4, 5, 2, 28, 29
-; CHECK-64-NEXT:    addi 5, 1, -32
+; CHECK-64-NEXT:    addi 4, 1, -32
 ; CHECK-64-NEXT:    stxv 0, -32(1)
-; CHECK-64-NEXT:    stwx 3, 5, 4
+; CHECK-64-NEXT:    stwx 3, 4, 5
 ; CHECK-64-NEXT:    lxv 34, -32(1)
 ; CHECK-64-NEXT:    blr
 ;
@@ -406,14 +403,12 @@
 ; CHECK-64-P10-LABEL: testFloat3:
 ; CHECK-64-P10:       # %bb.0: # %entry
 ; CHECK-64-P10-NEXT:    plwz 6, 65536(3), 0
-; CHECK-64-P10-NEXT:    extsw 4, 4
 ; CHECK-64-P10-NEXT:    slwi 4, 4, 2
 ; CHECK-64-P10-NEXT:    vinswlx 2, 4, 6
 ; CHECK-64-P10-NEXT:    li 4, 1
 ; CHECK-64-P10-NEXT:    rldic 4, 4, 36, 27
 ; CHECK-64-P10-NEXT:    lwzx 3, 3, 4
-; CHECK-64-P10-NEXT:    extsw 4, 5
-; CHECK-64-P10-NEXT:    slwi 4, 4, 2
+; CHECK-64-P10-NEXT:    slwi 4, 5, 2
 ; CHECK-64-P10-NEXT:    vinswlx 2, 4, 3
 ; CHECK-64-P10-NEXT:    blr
 ;
@@ -580,7 +575,7 @@
 define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) {
 ; CHECK-64-LABEL: testDouble1:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64:         rlwinm 3, 4, 3, 28, 28
+; CHECK-64-NEXT:    rlwinm 3, 4, 3, 28, 28
 ; CHECK-64-NEXT:    addi 4, 1, -16
 ; CHECK-64-NEXT:    stxv 34, -16(1)
 ; CHECK-64-NEXT:    stfdx 1, 4, 3
@@ -598,7 +593,6 @@
 ;
 ; CHECK-64-P10-LABEL: testDouble1:
 ; CHECK-64-P10:       # %bb.0: # %entry
-; CHECK-64-P10-NEXT:    extsw 4, 4
 ; CHECK-64-P10-NEXT:    mffprd 3, 1
 ; CHECK-64-P10-NEXT:    rlwinm 4, 4, 3, 0, 28
 ; CHECK-64-P10-NEXT:    vinsdlx 2, 4, 3
@@ -606,8 +600,8 @@
 ;
 ; CHECK-32-P10-LABEL: testDouble1:
 ; CHECK-32-P10:       # %bb.0: # %entry
-; CHECK-32-P10-DAG:     addi 4, 1, -16
-; CHECK-32-P10-DAG:     rlwinm 3, 5, 3, 28, 28
+; CHECK-32-P10-NEXT:    rlwinm 3, 5, 3, 28, 28
+; CHECK-32-P10-NEXT:    addi 4, 1, -16
 ; CHECK-32-P10-NEXT:    stxv 34, -16(1)
 ; CHECK-32-P10-NEXT:    stfdx 1, 4, 3
 ; CHECK-32-P10-NEXT:    lxv 34, -16(1)
@@ -621,17 +615,17 @@
 ; CHECK-64-LABEL: testDouble2:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    ld 6, 0(3)
-; CHECK-64-DAG:         rlwinm 4, 4, 3, 28, 28
-; CHECK-64-DAG:    addi 7, 1, -32
+; CHECK-64-NEXT:    addi 7, 1, -32
+; CHECK-64-NEXT:    rlwinm 4, 4, 3, 28, 28
 ; CHECK-64-NEXT:    stxv 34, -32(1)
+; CHECK-64-NEXT:    rlwinm 5, 5, 3, 28, 28
 ; CHECK-64-NEXT:    stdx 6, 7, 4
 ; CHECK-64-NEXT:    li 4, 1
 ; CHECK-64-NEXT:    lxv 0, -32(1)
 ; CHECK-64-NEXT:    ldx 3, 3, 4
-; CHECK-64-NEXT:    rlwinm 4, 5, 3, 28, 28
-; CHECK-64-NEXT:    addi 5, 1, -16
+; CHECK-64-NEXT:    addi 4, 1, -16
 ; CHECK-64-NEXT:    stxv 0, -16(1)
-; CHECK-64-NEXT:    stdx 3, 5, 4
+; CHECK-64-NEXT:    stdx 3, 4, 5
 ; CHECK-64-NEXT:    lxv 34, -16(1)
 ; CHECK-64-NEXT:    blr
 ;
@@ -654,20 +648,18 @@
 ; CHECK-64-P10-LABEL: testDouble2:
 ; CHECK-64-P10:       # %bb.0: # %entry
 ; CHECK-64-P10-NEXT:    ld 6, 0(3)
-; CHECK-64-P10-NEXT:    extsw 4, 4
 ; CHECK-64-P10-NEXT:    pld 3, 1(3), 0
 ; CHECK-64-P10-NEXT:    rlwinm 4, 4, 3, 0, 28
 ; CHECK-64-P10-NEXT:    vinsdlx 2, 4, 6
-; CHECK-64-P10-NEXT:    extsw 4, 5
-; CHECK-64-P10-NEXT:    rlwinm 4, 4, 3, 0, 28
+; CHECK-64-P10-NEXT:    rlwinm 4, 5, 3, 0, 28
 ; CHECK-64-P10-NEXT:    vinsdlx 2, 4, 3
 ; CHECK-64-P10-NEXT:    blr
 ;
 ; CHECK-32-P10-LABEL: testDouble2:
 ; CHECK-32-P10:       # %bb.0: # %entry
 ; CHECK-32-P10-NEXT:    lfd 0, 0(3)
-; CHECK-32-P10-DAG:     addi 6, 1, -32
-; CHECK-32-P10-DAG:     rlwinm 4, 4, 3, 28, 28
+; CHECK-32-P10-NEXT:    rlwinm 4, 4, 3, 28, 28
+; CHECK-32-P10-NEXT:    addi 6, 1, -32
 ; CHECK-32-P10-NEXT:    stxv 34, -32(1)
 ; CHECK-32-P10-NEXT:    rlwinm 5, 5, 3, 28, 28
 ; CHECK-32-P10-NEXT:    stfdx 0, 6, 4
@@ -693,8 +685,9 @@
 ; CHECK-64-LABEL: testDouble3:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    lis 6, 1
-; CHECK-64-DAG:     rlwinm 4, 4, 3, 28, 28
-; CHECK-64-DAG:     addi 7, 1, -32
+; CHECK-64-NEXT:    addi 7, 1, -32
+; CHECK-64-NEXT:    rlwinm 4, 4, 3, 28, 28
+; CHECK-64-NEXT:    rlwinm 5, 5, 3, 28, 28
 ; CHECK-64-NEXT:    ldx 6, 3, 6
 ; CHECK-64-NEXT:    stxv 34, -32(1)
 ; CHECK-64-NEXT:    stdx 6, 7, 4
@@ -702,10 +695,9 @@
 ; CHECK-64-NEXT:    lxv 0, -32(1)
 ; CHECK-64-NEXT:    rldic 4, 4, 36, 27
 ; CHECK-64-NEXT:    ldx 3, 3, 4
-; CHECK-64-NEXT:    rlwinm 4, 5, 3, 28, 28
-; CHECK-64-NEXT:    addi 5, 1, -16
+; CHECK-64-NEXT:    addi 4, 1, -16
 ; CHECK-64-NEXT:    stxv 0, -16(1)
-; CHECK-64-NEXT:    stdx 3, 5, 4
+; CHECK-64-NEXT:    stdx 3, 4, 5
 ; CHECK-64-NEXT:    lxv 34, -16(1)
 ; CHECK-64-NEXT:    blr
 ;
@@ -729,22 +721,20 @@
 ; CHECK-64-P10-LABEL: testDouble3:
 ; CHECK-64-P10:       # %bb.0: # %entry
 ; CHECK-64-P10-NEXT:    pld 6, 65536(3), 0
-; CHECK-64-P10-NEXT:    extsw 4, 4
 ; CHECK-64-P10-NEXT:    rlwinm 4, 4, 3, 0, 28
 ; CHECK-64-P10-NEXT:    vinsdlx 2, 4, 6
 ; CHECK-64-P10-NEXT:    li 4, 1
 ; CHECK-64-P10-NEXT:    rldic 4, 4, 36, 27
 ; CHECK-64-P10-NEXT:    ldx 3, 3, 4
-; CHECK-64-P10-NEXT:    extsw 4, 5
-; CHECK-64-P10-NEXT:    rlwinm 4, 4, 3, 0, 28
+; CHECK-64-P10-NEXT:    rlwinm 4, 5, 3, 0, 28
 ; CHECK-64-P10-NEXT:    vinsdlx 2, 4, 3
 ; CHECK-64-P10-NEXT:    blr
 ;
 ; CHECK-32-P10-LABEL: testDouble3:
 ; CHECK-32-P10:       # %bb.0: # %entry
 ; CHECK-32-P10-NEXT:    plfd 0, 65536(3), 0
-; CHECK-32-P10-DAG:     addi 6, 1, -32
-; CHECK-32-P10-DAG:     rlwinm 4, 4, 3, 28, 28
+; CHECK-32-P10-NEXT:    rlwinm 4, 4, 3, 28, 28
+; CHECK-32-P10-NEXT:    addi 6, 1, -32
 ; CHECK-32-P10-NEXT:    stxv 34, -32(1)
 ; CHECK-32-P10-NEXT:    rlwinm 5, 5, 3, 28, 28
 ; CHECK-32-P10-NEXT:    stfdx 0, 6, 4
diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
--- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -1112,10 +1112,11 @@
 define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) {
 ; CHECK-LABEL: getvelsc:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    andi. r4, r5, 8
+; CHECK-NEXT:    clrldi r4, r5, 32
 ; CHECK-NEXT:    li r3, 7
-; CHECK-NEXT:    lvsl v3, 0, r4
-; CHECK-NEXT:    andc r3, r3, r5
+; CHECK-NEXT:    andi. r5, r4, 8
+; CHECK-NEXT:    andc r3, r3, r4
+; CHECK-NEXT:    lvsl v3, 0, r5
 ; CHECK-NEXT:    sldi r3, r3, 3
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    mfvsrd r4, v2
@@ -1126,10 +1127,11 @@
 ; CHECK-LE-LABEL: getvelsc:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    li r3, 8
-; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    clrldi r4, r5, 32
+; CHECK-LE-NEXT:    andc r3, r3, r4
 ; CHECK-LE-NEXT:    lvsl v3, 0, r3
 ; CHECK-LE-NEXT:    li r3, 7
-; CHECK-LE-NEXT:    and r3, r3, r5
+; CHECK-LE-NEXT:    and r3, r3, r4
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-LE-NEXT:    sldi r3, r3, 3
 ; CHECK-LE-NEXT:    mfvsrd r4, v2
@@ -1139,10 +1141,11 @@
 ;
 ; CHECK-AIX-LABEL: getvelsc:
 ; CHECK-AIX:       # %bb.0: # %entry
-; CHECK-AIX-NEXT:    andi. 5, 3, 8
+; CHECK-AIX-NEXT:    clrldi 3, 3, 32
 ; CHECK-AIX-NEXT:    li 4, 7
-; CHECK-AIX-NEXT:    lvsl 3, 0, 5
+; CHECK-AIX-NEXT:    andi. 5, 3, 8
 ; CHECK-AIX-NEXT:    andc 3, 4, 3
+; CHECK-AIX-NEXT:    lvsl 3, 0, 5
 ; CHECK-AIX-NEXT:    sldi 3, 3, 3
 ; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-AIX-NEXT:    mfvsrd 4, 34
@@ -1160,10 +1163,11 @@
 define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) {
 ; CHECK-LABEL: getveluc:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    andi. r4, r5, 8
+; CHECK-NEXT:    clrldi r4, r5, 32
 ; CHECK-NEXT:    li r3, 7
-; CHECK-NEXT:    lvsl v3, 0, r4
-; CHECK-NEXT:    andc r3, r3, r5
+; CHECK-NEXT:    andi. r5, r4, 8
+; CHECK-NEXT:    andc r3, r3, r4
+; CHECK-NEXT:    lvsl v3, 0, r5
 ; CHECK-NEXT:    sldi r3, r3, 3
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    mfvsrd r4, v2
@@ -1174,10 +1178,11 @@
 ; CHECK-LE-LABEL: getveluc:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    li r3, 8
-; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    clrldi r4, r5, 32
+; CHECK-LE-NEXT:    andc r3, r3, r4
 ; CHECK-LE-NEXT:    lvsl v3, 0, r3
 ; CHECK-LE-NEXT:    li r3, 7
-; CHECK-LE-NEXT:    and r3, r3, r5
+; CHECK-LE-NEXT:    and r3, r3, r4
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-LE-NEXT:    sldi r3, r3, 3
 ; CHECK-LE-NEXT:    mfvsrd r4, v2
@@ -1187,10 +1192,11 @@
 ;
 ; CHECK-AIX-LABEL: getveluc:
 ; CHECK-AIX:       # %bb.0: # %entry
-; CHECK-AIX-NEXT:    andi. 5, 3, 8
+; CHECK-AIX-NEXT:    clrldi 3, 3, 32
 ; CHECK-AIX-NEXT:    li 4, 7
-; CHECK-AIX-NEXT:    lvsl 3, 0, 5
+; CHECK-AIX-NEXT:    andi. 5, 3, 8
 ; CHECK-AIX-NEXT:    andc 3, 4, 3
+; CHECK-AIX-NEXT:    lvsl 3, 0, 5
 ; CHECK-AIX-NEXT:    sldi 3, 3, 3
 ; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-AIX-NEXT:    mfvsrd 4, 34
@@ -1672,12 +1678,13 @@
 define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) {
 ; CHECK-LABEL: getvelss:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    andi. r4, r5, 4
+; CHECK-NEXT:    clrldi r4, r5, 32
 ; CHECK-NEXT:    li r3, 3
-; CHECK-NEXT:    sldi r4, r4, 1
-; CHECK-NEXT:    andc r3, r3, r5
-; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    andi. r5, r4, 4
+; CHECK-NEXT:    andc r3, r3, r4
+; CHECK-NEXT:    sldi r5, r5, 1
 ; CHECK-NEXT:    sldi r3, r3, 4
+; CHECK-NEXT:    lvsl v3, 0, r5
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    mfvsrd r4, v2
 ; CHECK-NEXT:    srd r3, r4, r3
@@ -1687,11 +1694,12 @@
 ; CHECK-LE-LABEL: getvelss:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    li r3, 4
-; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    clrldi r4, r5, 32
+; CHECK-LE-NEXT:    andc r3, r3, r4
 ; CHECK-LE-NEXT:    sldi r3, r3, 1
 ; CHECK-LE-NEXT:    lvsl v3, 0, r3
 ; CHECK-LE-NEXT:    li r3, 3
-; CHECK-LE-NEXT:    and r3, r3, r5
+; CHECK-LE-NEXT:    and r3, r3, r4
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-LE-NEXT:    sldi r3, r3, 4
 ; CHECK-LE-NEXT:    mfvsrd r4, v2
@@ -1701,12 +1709,13 @@
 ;
 ; CHECK-AIX-LABEL: getvelss:
 ; CHECK-AIX:       # %bb.0: # %entry
-; CHECK-AIX-NEXT:    andi. 5, 3, 4
+; CHECK-AIX-NEXT:    clrldi 3, 3, 32
 ; CHECK-AIX-NEXT:    li 4, 3
-; CHECK-AIX-NEXT:    sldi 5, 5, 1
+; CHECK-AIX-NEXT:    andi. 5, 3, 4
 ; CHECK-AIX-NEXT:    andc 3, 4, 3
-; CHECK-AIX-NEXT:    lvsl 3, 0, 5
+; CHECK-AIX-NEXT:    sldi 5, 5, 1
 ; CHECK-AIX-NEXT:    sldi 3, 3, 4
+; CHECK-AIX-NEXT:    lvsl 3, 0, 5
 ; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-AIX-NEXT:    mfvsrd 4, 34
 ; CHECK-AIX-NEXT:    srd 3, 4, 3
@@ -1723,12 +1732,13 @@
 define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) {
 ; CHECK-LABEL: getvelus:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    andi. r4, r5, 4
+; CHECK-NEXT:    clrldi r4, r5, 32
 ; CHECK-NEXT:    li r3, 3
-; CHECK-NEXT:    sldi r4, r4, 1
-; CHECK-NEXT:    andc r3, r3, r5
-; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    andi. r5, r4, 4
+; CHECK-NEXT:    andc r3, r3, r4
+; CHECK-NEXT:    sldi r5, r5, 1
 ; CHECK-NEXT:    sldi r3, r3, 4
+; CHECK-NEXT:    lvsl v3, 0, r5
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    mfvsrd r4, v2
 ; CHECK-NEXT:    srd r3, r4, r3
@@ -1738,11 +1748,12 @@
 ; CHECK-LE-LABEL: getvelus:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    li r3, 4
-; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    clrldi r4, r5, 32
+; CHECK-LE-NEXT:    andc r3, r3, r4
 ; CHECK-LE-NEXT:    sldi r3, r3, 1
 ; CHECK-LE-NEXT:    lvsl v3, 0, r3
 ; CHECK-LE-NEXT:    li r3, 3
-; CHECK-LE-NEXT:    and r3, r3, r5
+; CHECK-LE-NEXT:    and r3, r3, r4
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-LE-NEXT:    sldi r3, r3, 4
 ; CHECK-LE-NEXT:    mfvsrd r4, v2
@@ -1752,12 +1763,13 @@
 ;
 ; CHECK-AIX-LABEL: getvelus:
 ; CHECK-AIX:       # %bb.0: # %entry
-; CHECK-AIX-NEXT:    andi. 5, 3, 4
+; CHECK-AIX-NEXT:    clrldi 3, 3, 32
 ; CHECK-AIX-NEXT:    li 4, 3
-; CHECK-AIX-NEXT:    sldi 5, 5, 1
+; CHECK-AIX-NEXT:    andi. 5, 3, 4
 ; CHECK-AIX-NEXT:    andc 3, 4, 3
-; CHECK-AIX-NEXT:    lvsl 3, 0, 5
+; CHECK-AIX-NEXT:    sldi 5, 5, 1
 ; CHECK-AIX-NEXT:    sldi 3, 3, 4
+; CHECK-AIX-NEXT:    lvsl 3, 0, 5
 ; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-AIX-NEXT:    mfvsrd 4, 34
 ; CHECK-AIX-NEXT:    srd 3, 4, 3
@@ -1988,12 +2000,13 @@
 define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) {
 ; CHECK-LABEL: getvelsi:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    andi. r4, r5, 2
+; CHECK-NEXT:    clrldi r4, r5, 32
 ; CHECK-NEXT:    li r3, 1
-; CHECK-NEXT:    sldi r4, r4, 2
-; CHECK-NEXT:    andc r3, r3, r5
-; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    andi. r5, r4, 2
+; CHECK-NEXT:    andc r3, r3, r4
+; CHECK-NEXT:    sldi r5, r5, 2
 ; CHECK-NEXT:    sldi r3, r3, 5
+; CHECK-NEXT:    lvsl v3, 0, r5
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    mfvsrd r4, v2
 ; CHECK-NEXT:    srd r3, r4, r3
@@ -2003,11 +2016,12 @@
 ; CHECK-LE-LABEL: getvelsi:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    li r3, 2
-; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    clrldi r4, r5, 32
+; CHECK-LE-NEXT:    andc r3, r3, r4
 ; CHECK-LE-NEXT:    sldi r3, r3, 2
 ; CHECK-LE-NEXT:    lvsl v3, 0, r3
 ; CHECK-LE-NEXT:    li r3, 1
-; CHECK-LE-NEXT:    and r3, r3, r5
+; CHECK-LE-NEXT:    and r3, r3, r4
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-LE-NEXT:    sldi r3, r3, 5
 ; CHECK-LE-NEXT:    mfvsrd r4, v2
@@ -2017,12 +2031,13 @@
 ;
 ; CHECK-AIX-LABEL: getvelsi:
 ; CHECK-AIX:       # %bb.0: # %entry
-; CHECK-AIX-NEXT:    andi. 5, 3, 2
+; CHECK-AIX-NEXT:    clrldi 3, 3, 32
 ; CHECK-AIX-NEXT:    li 4, 1
-; CHECK-AIX-NEXT:    sldi 5, 5, 2
+; CHECK-AIX-NEXT:    andi. 5, 3, 2
 ; CHECK-AIX-NEXT:    andc 3, 4, 3
-; CHECK-AIX-NEXT:    lvsl 3, 0, 5
+; CHECK-AIX-NEXT:    sldi 5, 5, 2
 ; CHECK-AIX-NEXT:    sldi 3, 3, 5
+; CHECK-AIX-NEXT:    lvsl 3, 0, 5
 ; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-AIX-NEXT:    mfvsrd 4, 34
 ; CHECK-AIX-NEXT:    srd 3, 4, 3
@@ -2038,12 +2053,13 @@
 define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) {
 ; CHECK-LABEL: getvelui:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    andi. r4, r5, 2
+; CHECK-NEXT:    clrldi r4, r5, 32
 ; CHECK-NEXT:    li r3, 1
-; CHECK-NEXT:    sldi r4, r4, 2
-; CHECK-NEXT:    andc r3, r3, r5
-; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    andi. r5, r4, 2
+; CHECK-NEXT:    andc r3, r3, r4
+; CHECK-NEXT:    sldi r5, r5, 2
 ; CHECK-NEXT:    sldi r3, r3, 5
+; CHECK-NEXT:    lvsl v3, 0, r5
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    mfvsrd r4, v2
 ; CHECK-NEXT:    srd r3, r4, r3
@@ -2053,11 +2069,12 @@
 ; CHECK-LE-LABEL: getvelui:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    li r3, 2
-; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    clrldi r4, r5, 32
+; CHECK-LE-NEXT:    andc r3, r3, r4
 ; CHECK-LE-NEXT:    sldi r3, r3, 2
 ; CHECK-LE-NEXT:    lvsl v3, 0, r3
 ; CHECK-LE-NEXT:    li r3, 1
-; CHECK-LE-NEXT:    and r3, r3, r5
+; CHECK-LE-NEXT:    and r3, r3, r4
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-LE-NEXT:    sldi r3, r3, 5
 ; CHECK-LE-NEXT:    mfvsrd r4, v2
@@ -2067,12 +2084,13 @@
 ;
 ; CHECK-AIX-LABEL: getvelui:
 ; CHECK-AIX:       # %bb.0: # %entry
-; CHECK-AIX-NEXT:    andi. 5, 3, 2
+; CHECK-AIX-NEXT:    clrldi 3, 3, 32
 ; CHECK-AIX-NEXT:    li 4, 1
-; CHECK-AIX-NEXT:    sldi 5, 5, 2
+; CHECK-AIX-NEXT:    andi. 5, 3, 2
 ; CHECK-AIX-NEXT:    andc 3, 4, 3
-; CHECK-AIX-NEXT:    lvsl 3, 0, 5
+; CHECK-AIX-NEXT:    sldi 5, 5, 2
 ; CHECK-AIX-NEXT:    sldi 3, 3, 5
+; CHECK-AIX-NEXT:    lvsl 3, 0, 5
 ; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-AIX-NEXT:    mfvsrd 4, 34
 ; CHECK-AIX-NEXT:    srd 3, 4, 3
@@ -2186,7 +2204,8 @@
 define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) {
 ; CHECK-LABEL: getvelsl:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    andi. r3, r5, 1
+; CHECK-NEXT:    clrldi r3, r5, 32
+; CHECK-NEXT:    andi. r3, r3, 1
 ; CHECK-NEXT:    sldi r3, r3, 3
 ; CHECK-NEXT:    lvsl v3, 0, r3
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
@@ -2196,7 +2215,8 @@
 ; CHECK-LE-LABEL: getvelsl:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    li r3, 1
-; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    clrldi r4, r5, 32
+; CHECK-LE-NEXT:    andc r3, r3, r4
 ; CHECK-LE-NEXT:    sldi r3, r3, 3
 ; CHECK-LE-NEXT:    lvsl v3, 0, r3
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
@@ -2205,6 +2225,7 @@
 ;
 ; CHECK-AIX-LABEL: getvelsl:
 ; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    clrldi 3, 3, 32
 ; CHECK-AIX-NEXT:    andi. 3, 3, 1
 ; CHECK-AIX-NEXT:    sldi 3, 3, 3
 ; CHECK-AIX-NEXT:    lvsl 3, 0, 3
@@ -2221,7 +2242,8 @@
 define i64 @getvelul(<2 x i64> %vul, i32 signext %i) {
 ; CHECK-LABEL: getvelul:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    andi. r3, r5, 1
+; CHECK-NEXT:    clrldi r3, r5, 32
+; CHECK-NEXT:    andi. r3, r3, 1
 ; CHECK-NEXT:    sldi r3, r3, 3
 ; CHECK-NEXT:    lvsl v3, 0, r3
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
@@ -2231,7 +2253,8 @@
 ; CHECK-LE-LABEL: getvelul:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    li r3, 1
-; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    clrldi r4, r5, 32
+; CHECK-LE-NEXT:    andc r3, r3, r4
 ; CHECK-LE-NEXT:    sldi r3, r3, 3
 ; CHECK-LE-NEXT:    lvsl v3, 0, r3
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
@@ -2240,6 +2263,7 @@
 ;
 ; CHECK-AIX-LABEL: getvelul:
 ; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    clrldi 3, 3, 32
 ; CHECK-AIX-NEXT:    andi. 3, 3, 1
 ; CHECK-AIX-NEXT:    sldi 3, 3, 3
 ; CHECK-AIX-NEXT:    lvsl 3, 0, 3
@@ -2357,7 +2381,7 @@
 define float @getvelf(<4 x float> %vf, i32 signext %i) {
 ; CHECK-LABEL: getvelf:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sldi r3, r5, 2
+; CHECK-NEXT:    rldic r3, r5, 2, 30
 ; CHECK-NEXT:    lvsl v3, 0, r3
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    xscvspdpn f1, v2
@@ -2365,7 +2389,8 @@
 ;
 ; CHECK-LE-LABEL: getvelf:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xori r3, r5, 3
+; CHECK-LE-NEXT:    clrldi r3, r5, 32
+; CHECK-LE-NEXT:    xori r3, r3, 3
 ; CHECK-LE-NEXT:    sldi r3, r3, 2
 ; CHECK-LE-NEXT:    lvsl v3, 0, r3
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
@@ -2374,7 +2399,7 @@
 ;
 ; CHECK-AIX-LABEL: getvelf:
 ; CHECK-AIX:       # %bb.0: # %entry
-; CHECK-AIX-NEXT:    sldi 3, 3, 2
+; CHECK-AIX-NEXT:    rldic 3, 3, 2, 30
 ; CHECK-AIX-NEXT:    lvsl 3, 0, 3
 ; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-AIX-NEXT:    xscvspdpn 1, 34
@@ -2436,7 +2461,8 @@
 define double @getveld(<2 x double> %vd, i32 signext %i) {
 ; CHECK-LABEL: getveld:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    andi. r3, r5, 1
+; CHECK-NEXT:    clrldi r3, r5, 32
+; CHECK-NEXT:    andi. r3, r3, 1
 ; CHECK-NEXT:    sldi r3, r3, 3
 ; CHECK-NEXT:    lvsl v3, 0, r3
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
@@ -2447,7 +2473,8 @@
 ; CHECK-LE-LABEL: getveld:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    li r3, 1
-; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    clrldi r4, r5, 32
+; CHECK-LE-NEXT:    andc r3, r3, r4
 ; CHECK-LE-NEXT:    sldi r3, r3, 3
 ; CHECK-LE-NEXT:    lvsl v3, 0, r3
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
@@ -2457,6 +2484,7 @@
 ;
 ; CHECK-AIX-LABEL: getveld:
 ; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    clrldi 3, 3, 32
 ; CHECK-AIX-NEXT:    andi. 3, 3, 1
 ; CHECK-AIX-NEXT:    sldi 3, 3, 3
 ; CHECK-AIX-NEXT:    lvsl 3, 0, 3
diff --git a/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll b/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
--- a/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
+++ b/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -ppc-late-peephole=true < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
 ; RUN:  --check-prefix=CHECK-BE
@@ -6,109 +7,152 @@
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i32 @geti(<4 x i32> %a, i32 signext %b) {
+; CHECK-LABEL: geti:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li 3, 2
+; CHECK-NEXT:    clrldi 4, 5, 32
+; CHECK-NEXT:    andc 3, 3, 4
+; CHECK-NEXT:    sldi 3, 3, 2
+; CHECK-NEXT:    lvsl 3, 0, 3
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    and 3, 3, 4
+; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    sldi 3, 3, 5
+; CHECK-NEXT:    mfvsrd 4, 34
+; CHECK-NEXT:    srd 3, 4, 3
+; CHECK-NEXT:    extsw 3, 3
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: geti:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    clrldi 4, 5, 32
+; CHECK-BE-NEXT:    li 3, 1
+; CHECK-BE-NEXT:    andi. 5, 4, 2
+; CHECK-BE-NEXT:    andc 3, 3, 4
+; CHECK-BE-NEXT:    sldi 5, 5, 2
+; CHECK-BE-NEXT:    sldi 3, 3, 5
+; CHECK-BE-NEXT:    lvsl 3, 0, 5
+; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    mfvsrd 4, 34
+; CHECK-BE-NEXT:    srd 3, 4, 3
+; CHECK-BE-NEXT:    extsw 3, 3
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-P7-LABEL: geti:
+; CHECK-P7:       # %bb.0: # %entry
+; CHECK-P7-NEXT:    addi 3, 1, -16
+; CHECK-P7-NEXT:    rlwinm 4, 5, 2, 28, 29
+; CHECK-P7-NEXT:    stxvw4x 34, 0, 3
+; CHECK-P7-NEXT:    lwax 3, 3, 4
+; CHECK-P7-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %a, i32 %b
   ret i32 %vecext
-; CHECK-LABEL: @geti
-; CHECK-P7-LABEL: @geti
-; CHECK-BE-LABEL: @geti
-; CHECK-DAG: li [[TRUNCREG:[0-9]+]], 2
-; CHECK-DAG: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5
-; CHECK-DAG: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 2
-; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]]
-; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]]
-; CHECK-DAG: li [[ONEREG:[0-9]+]], 1
-; CHECK-DAG: and [[ELEMSREG:[0-9]+]], [[ONEREG]], 5
-; CHECK-DAG: sldi [[SHAMREG:[0-9]+]], [[ELEMSREG]], 5
-; CHECK: mfvsrd [[TOGPR:[0-9]+]],
-; CHECK: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]]
-; CHECK: extsw 3, [[RSHREG]]
-; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 2, 28, 29
-; CHECK-P7-DAG: stxvw4x 34,
-; CHECK-P7: lwax 3, 3, [[ELEMOFFREG]]
-; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 2
-; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 2
-; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]]
-; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
-; CHECK-BE-DAG: li [[IMMREG:[0-9]+]], 1
-; CHECK-BE-DAG: andc [[ANDCREG:[0-9]+]], [[IMMREG]], 5
-; CHECK-BE-DAG: sldi [[SHAMREG:[0-9]+]], [[ANDCREG]], 5
-; CHECK-BE: mfvsrd [[TOGPR:[0-9]+]],
-; CHECK-BE: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]]
-; CHECK-BE: extsw 3, [[RSHREG]]
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define i64 @getl(<2 x i64> %a, i32 signext %b) {
+; CHECK-LABEL: getl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    clrldi 4, 5, 32
+; CHECK-NEXT:    andc 3, 3, 4
+; CHECK-NEXT:    sldi 3, 3, 3
+; CHECK-NEXT:    lvsl 3, 0, 3
+; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    mfvsrd 3, 34
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: getl:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    clrldi 3, 5, 32
+; CHECK-BE-NEXT:    andi. 3, 3, 1
+; CHECK-BE-NEXT:    sldi 3, 3, 3
+; CHECK-BE-NEXT:    lvsl 3, 0, 3
+; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    mfvsrd 3, 34
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-P7-LABEL: getl:
+; CHECK-P7:       # %bb.0: # %entry
+; CHECK-P7-NEXT:    addi 3, 1, -16
+; CHECK-P7-NEXT:    rlwinm 4, 5, 3, 28, 28
+; CHECK-P7-NEXT:    stxvd2x 34, 0, 3
+; CHECK-P7-NEXT:    ldx 3, 3, 4
+; CHECK-P7-NEXT:    blr
 entry:
   %vecext = extractelement <2 x i64> %a, i32 %b
   ret i64 %vecext
-; CHECK-LABEL: @getl
-; CHECK-P7-LABEL: @getl
-; CHECK-BE-LABEL: @getl
-; CHECK-DAG: li [[TRUNCREG:[0-9]+]], 1
-; CHECK-DAG: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5
-; CHECK-DAG: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 3
-; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]]
-; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]]
-; CHECK: mfvsrd 3,
-; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 3, 28, 28
-; CHECK-P7-DAG: stxvd2x 34,
-; CHECK-P7: ldx 3, 3, [[ELEMOFFREG]]
-; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1
-; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3
-; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]]
-; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
-; CHECK-BE: mfvsrd 3,
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define float @getf(<4 x float> %a, i32 signext %b) {
+; CHECK-LABEL: getf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    clrldi 3, 5, 32
+; CHECK-NEXT:    xori 3, 3, 3
+; CHECK-NEXT:    sldi 3, 3, 2
+; CHECK-NEXT:    lvsl 3, 0, 3
+; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    xscvspdpn 1, 34
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: getf:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    rldic 3, 5, 2, 30
+; CHECK-BE-NEXT:    lvsl 3, 0, 3
+; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    xscvspdpn 1, 34
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-P7-LABEL: getf:
+; CHECK-P7:       # %bb.0: # %entry
+; CHECK-P7-NEXT:    addi 3, 1, -16
+; CHECK-P7-NEXT:    rlwinm 4, 5, 2, 28, 29
+; CHECK-P7-NEXT:    stxvw4x 34, 0, 3
+; CHECK-P7-NEXT:    lfsx 1, 3, 4
+; CHECK-P7-NEXT:    blr
 entry:
   %vecext = extractelement <4 x float> %a, i32 %b
   ret float %vecext
-; CHECK-LABEL: @getf
-; CHECK-P7-LABEL: @getf
-; CHECK-BE-LABEL: @getf
-; CHECK: xori [[TRUNCREG:[0-9]+]], 5, 3
-; CHECK: sldi [[SHIFTREG:[0-9]+]], [[TRUNCREG]], 2
-; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]]
-; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
-; CHECK: xscvspdpn 1,
-; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 2, 28, 29
-; CHECK-P7-DAG: stxvw4x 34,
-; CHECK-P7: lfsx 1, 3, [[ELEMOFFREG]]
-; CHECK-BE: sldi [[ELNOREG:[0-9]+]], 5, 2
-; CHECK-BE: lvsl [[SHMSKREG:[0-9]+]], 0, [[ELNOREG]]
-; CHECK-BE: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
-; CHECK-BE: xscvspdpn 1,
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define double @getd(<2 x double> %a, i32 signext %b) {
+; CHECK-LABEL: getd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    clrldi 4, 5, 32
+; CHECK-NEXT:    andc 3, 3, 4
+; CHECK-NEXT:    sldi 3, 3, 3
+; CHECK-NEXT:    lvsl 3, 0, 3
+; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    xxlor 1, 34, 34
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: getd:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    clrldi 3, 5, 32
+; CHECK-BE-NEXT:    andi. 3, 3, 1
+; CHECK-BE-NEXT:    sldi 3, 3, 3
+; CHECK-BE-NEXT:    lvsl 3, 0, 3
+; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    xxlor 1, 34, 34
+; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-P7-LABEL: getd:
+; CHECK-P7:       # %bb.0: # %entry
+; CHECK-P7-NEXT:    clrldi 3, 5, 32
+; CHECK-P7-NEXT:    andi. 3, 3, 1
+; CHECK-P7-NEXT:    sldi 3, 3, 3
+; CHECK-P7-NEXT:    lvsl 3, 0, 3
+; CHECK-P7-NEXT:    vperm 2, 2, 2, 3
+; CHECK-P7-NEXT:    xxlor 1, 34, 34
+; CHECK-P7-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-P7-NEXT:    blr
 entry:
   %vecext = extractelement <2 x double> %a, i32 %b
   ret double %vecext
-; CHECK-LABEL: @getd
-; CHECK-P7-LABEL: @getd
-; CHECK-BE-LABEL: @getd
-; CHECK: li [[TRUNCREG:[0-9]+]], 1
-; CHECK: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5
-; CHECK: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 3
-; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]]
-; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
-; FIXME: the instruction below is a redundant regclass copy, to be removed
-; CHECK: xxlor 1,
-; CHECK-P7-DAG: andi. [[ANDREG:[0-9]+]], 5, 1
-; CHECK-P7-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3
-; CHECK-P7-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]]
-; CHECK-P7-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
-; FIXME: the instruction below is a redundant regclass copy, to be removed
-; CHECK-P7: xxlor 1,
-; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1
-; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3
-; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]]
-; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
-; FIXME: the instruction below is a redundant regclass copy, to be removed
-; CHECK-BE: xxlor 1,
 }
diff --git a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
--- a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
@@ -5,13 +5,15 @@
 define zeroext i8 @test1(<16 x i8> %a, i32 signext %index) {
 ; CHECK-LE-LABEL: test1:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    vextubrx 3, 5, 2
+; CHECK-LE-NEXT:    clrldi 3, 5, 32
+; CHECK-LE-NEXT:    vextubrx 3, 3, 2
 ; CHECK-LE-NEXT:    clrldi 3, 3, 56
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test1:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    vextublx 3, 5, 2
+; CHECK-BE-NEXT:    clrldi 3, 5, 32
+; CHECK-BE-NEXT:    vextublx 3, 3, 2
 ; CHECK-BE-NEXT:    clrldi 3, 3, 56
 ; CHECK-BE-NEXT:    blr
 
@@ -23,13 +25,15 @@
 define signext i8 @test2(<16 x i8> %a, i32 signext %index) {
 ; CHECK-LE-LABEL: test2:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    vextubrx 3, 5, 2
+; CHECK-LE-NEXT:    clrldi 3, 5, 32
+; CHECK-LE-NEXT:    vextubrx 3, 3, 2
 ; CHECK-LE-NEXT:    extsb 3, 3
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    vextublx 3, 5, 2
+; CHECK-BE-NEXT:    clrldi 3, 5, 32
+; CHECK-BE-NEXT:    vextublx 3, 3, 2
 ; CHECK-BE-NEXT:    extsb 3, 3
 ; CHECK-BE-NEXT:    blr
 
@@ -41,14 +45,16 @@
 define zeroext i16 @test3(<8 x i16> %a, i32 signext %index) {
 ; CHECK-LE-LABEL: test3:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    rlwinm 3, 5, 1, 28, 30
+; CHECK-LE-NEXT:    clrldi 3, 5, 32
+; CHECK-LE-NEXT:    rlwinm 3, 3, 1, 28, 30
 ; CHECK-LE-NEXT:    vextuhrx 3, 3, 2
 ; CHECK-LE-NEXT:    clrldi 3, 3, 48
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test3:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    rlwinm 3, 5, 1, 28, 30
+; CHECK-BE-NEXT:    clrldi 3, 5, 32
+; CHECK-BE-NEXT:    rlwinm 3, 3, 1, 28, 30
 ; CHECK-BE-NEXT:    vextuhlx 3, 3, 2
 ; CHECK-BE-NEXT:    clrldi 3, 3, 48
 ; CHECK-BE-NEXT:    blr
@@ -61,14 +67,16 @@
 define signext i16 @test4(<8 x i16> %a, i32 signext %index) {
 ; CHECK-LE-LABEL: test4:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    rlwinm 3, 5, 1, 28, 30
+; CHECK-LE-NEXT:    clrldi 3, 5, 32
+; CHECK-LE-NEXT:    rlwinm 3, 3, 1, 28, 30
 ; CHECK-LE-NEXT:    vextuhrx 3, 3, 2
 ; CHECK-LE-NEXT:    extsh 3, 3
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    rlwinm 3, 5, 1, 28, 30
+; CHECK-BE-NEXT:    clrldi 3, 5, 32
+; CHECK-BE-NEXT:    rlwinm 3, 3, 1, 28, 30
 ; CHECK-BE-NEXT:    vextuhlx 3, 3, 2
 ; CHECK-BE-NEXT:    extsh 3, 3
 ; CHECK-BE-NEXT:    blr
@@ -81,13 +89,15 @@
 define zeroext i32 @test5(<4 x i32> %a, i32 signext %index) {
 ; CHECK-LE-LABEL: test5:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    rlwinm 3, 5, 2, 28, 29
+; CHECK-LE-NEXT:    clrldi 3, 5, 32
+; CHECK-LE-NEXT:    rlwinm 3, 3, 2, 28, 29
 ; CHECK-LE-NEXT:    vextuwrx 3, 3, 2
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test5:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    rlwinm 3, 5, 2, 28, 29
+; CHECK-BE-NEXT:    clrldi 3, 5, 32
+; CHECK-BE-NEXT:    rlwinm 3, 3, 2, 28, 29
 ; CHECK-BE-NEXT:    vextuwlx 3, 3, 2
 ; CHECK-BE-NEXT:    blr
 
@@ -99,14 +109,16 @@
 define signext i32 @test6(<4 x i32> %a, i32 signext %index) {
 ; CHECK-LE-LABEL: test6:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    rlwinm 3, 5, 2, 28, 29
+; CHECK-LE-NEXT:    clrldi 3, 5, 32
+; CHECK-LE-NEXT:    rlwinm 3, 3, 2, 28, 29
 ; CHECK-LE-NEXT:    vextuwrx 3, 3, 2
 ; CHECK-LE-NEXT:    extsw 3, 3
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test6:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    rlwinm 3, 5, 2, 28, 29
+; CHECK-BE-NEXT:    clrldi 3, 5, 32
+; CHECK-BE-NEXT:    rlwinm 3, 3, 2, 28, 29
 ; CHECK-BE-NEXT:    vextuwlx 3, 3, 2
 ; CHECK-BE-NEXT:    extsw 3, 3
 ; CHECK-BE-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll b/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll
--- a/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll
@@ -5,14 +5,16 @@
 define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) {
 ; CHECK-LE-LABEL: test_add1:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    vextubrx 3, 5, 2
+; CHECK-LE-NEXT:    clrldi 3, 5, 32
+; CHECK-LE-NEXT:    vextubrx 3, 3, 2
 ; CHECK-LE-NEXT:    add 3, 3, 6
 ; CHECK-LE-NEXT:    clrldi 3, 3, 56
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test_add1:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    vextublx 3, 5, 2
+; CHECK-BE-NEXT:    clrldi 3, 5, 32
+; CHECK-BE-NEXT:    vextublx 3, 3, 2
 ; CHECK-BE-NEXT:    add 3, 3, 6
 ; CHECK-BE-NEXT:    clrldi 3, 3, 56
 ; CHECK-BE-NEXT:    blr
@@ -28,14 +30,16 @@
 define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) {
 ; CHECK-LE-LABEL: test_add2:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    vextubrx 3, 5, 2
+; CHECK-LE-NEXT:    clrldi 3, 5, 32
+; CHECK-LE-NEXT:    vextubrx 3, 3, 2
 ; CHECK-LE-NEXT:    add 3, 3, 6
 ; CHECK-LE-NEXT:    extsb 3, 3
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test_add2:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    vextublx 3, 5, 2
+; CHECK-BE-NEXT:    clrldi 3, 5, 32
+; CHECK-BE-NEXT:    vextublx 3, 3, 2
 ; CHECK-BE-NEXT:    add 3, 3, 6
 ; CHECK-BE-NEXT:    extsb 3, 3
 ; CHECK-BE-NEXT:    blr
@@ -51,7 +55,8 @@
 define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) {
 ; CHECK-LE-LABEL: test_add3:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    rlwinm 3, 5, 1, 28, 30
+; CHECK-LE-NEXT:    clrldi 3, 5, 32
+; CHECK-LE-NEXT:    rlwinm 3, 3, 1, 28, 30
 ; CHECK-LE-NEXT:    vextuhrx 3, 3, 2
 ; CHECK-LE-NEXT:    add 3, 3, 6
 ; CHECK-LE-NEXT:    clrldi 3, 3, 48
@@ -59,7 +64,8 @@
 ;
 ; CHECK-BE-LABEL: test_add3:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    rlwinm 3, 5, 1, 28, 30
+; CHECK-BE-NEXT:    clrldi 3, 5, 32
+; CHECK-BE-NEXT:    rlwinm 3, 3, 1, 28, 30
 ; CHECK-BE-NEXT:    vextuhlx 3, 3, 2
 ; CHECK-BE-NEXT:    add 3, 3, 6
 ; CHECK-BE-NEXT:    clrldi 3, 3, 48
@@ -76,7 +82,8 @@
 define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) {
 ; CHECK-LE-LABEL: test_add4:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    rlwinm 3, 5, 1, 28, 30
+; CHECK-LE-NEXT:    clrldi 3, 5, 32
+; CHECK-LE-NEXT:    rlwinm 3, 3, 1, 28, 30
 ; CHECK-LE-NEXT:    vextuhrx 3, 3, 2
 ; CHECK-LE-NEXT:    add 3, 3, 6
 ; CHECK-LE-NEXT:    extsh 3, 3
@@ -84,7 +91,8 @@
 ;
 ; CHECK-BE-LABEL: test_add4:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    rlwinm 3, 5, 1, 28, 30
+; CHECK-BE-NEXT:    clrldi 3, 5, 32
+; CHECK-BE-NEXT:    rlwinm 3, 3, 1, 28, 30
 ; CHECK-BE-NEXT:    vextuhlx 3, 3, 2
 ; CHECK-BE-NEXT:    add 3, 3, 6
 ; CHECK-BE-NEXT:    extsh 3, 3
@@ -101,7 +109,8 @@
 define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) {
 ; CHECK-LE-LABEL: test_add5:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    rlwinm 3, 5, 2, 28, 29
+; CHECK-LE-NEXT:    clrldi 3, 5, 32
+; CHECK-LE-NEXT:    rlwinm 3, 3, 2, 28, 29
 ; CHECK-LE-NEXT:    vextuwrx 3, 3, 2
 ; CHECK-LE-NEXT:    add 3, 3, 6
 ; CHECK-LE-NEXT:    clrldi 3, 3, 32
@@ -109,7 +118,8 @@
 ;
 ; CHECK-BE-LABEL: test_add5:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    rlwinm 3, 5, 2, 28, 29
+; CHECK-BE-NEXT:    clrldi 3, 5, 32
+; CHECK-BE-NEXT:    rlwinm 3, 3, 2, 28, 29
 ; CHECK-BE-NEXT:    vextuwlx 3, 3, 2
 ; CHECK-BE-NEXT:    add 3, 3, 6
 ; CHECK-BE-NEXT:    clrldi 3, 3, 32
@@ -123,7 +133,8 @@
 define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) {
 ; CHECK-LE-LABEL: test_add6:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    rlwinm 3, 5, 2, 28, 29
+; CHECK-LE-NEXT:    clrldi 3, 5, 32
+; CHECK-LE-NEXT:    rlwinm 3, 3, 2, 28, 29
 ; CHECK-LE-NEXT:    vextuwrx 3, 3, 2
 ; CHECK-LE-NEXT:    add 3, 3, 6
 ; CHECK-LE-NEXT:    extsw 3, 3
@@ -131,7 +142,8 @@
 ;
 ; CHECK-BE-LABEL: test_add6:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    rlwinm 3, 5, 2, 28, 29
+; CHECK-BE-NEXT:    clrldi 3, 5, 32
+; CHECK-BE-NEXT:    rlwinm 3, 3, 2, 28, 29
 ; CHECK-BE-NEXT:    vextuwlx 3, 3, 2
 ; CHECK-BE-NEXT:    add 3, 3, 6
 ; CHECK-BE-NEXT:    extsw 3, 3
diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
--- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
@@ -353,16 +353,14 @@
 ; CHECK-LABEL: testFloat1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xscvdpspn v3, f1
-; CHECK-NEXT:    extsw r3, r6
-; CHECK-NEXT:    slwi r3, r3, 2
+; CHECK-NEXT:    slwi r3, r6, 2
 ; CHECK-NEXT:    vinswvrx v2, r3, v3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testFloat1:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xscvdpspn v3, f1
-; CHECK-BE-NEXT:    extsw r3, r6
-; CHECK-BE-NEXT:    slwi r3, r3, 2
+; CHECK-BE-NEXT:    slwi r3, r6, 2
 ; CHECK-BE-NEXT:    vinswvlx v2, r3, v3
 ; CHECK-BE-NEXT:    blr
 ;
@@ -392,74 +390,54 @@
 ; CHECK-LABEL: testFloat2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lwz r3, 0(r5)
-; CHECK-NEXT:    extsw r4, r6
-; CHECK-NEXT:    slwi r4, r4, 2
+; CHECK-NEXT:    slwi r4, r6, 2
 ; CHECK-NEXT:    vinswrx v2, r4, r3
 ; CHECK-NEXT:    lwz r3, 1(r5)
-; CHECK-NEXT:    extsw r4, r7
-; CHECK-NEXT:    slwi r4, r4, 2
+; CHECK-NEXT:    slwi r4, r7, 2
 ; CHECK-NEXT:    vinswrx v2, r4, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testFloat2:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lwz r3, 0(r5)
-; CHECK-BE-NEXT:    extsw r4, r6
-; CHECK-BE-NEXT:    slwi r4, r4, 2
+; CHECK-BE-NEXT:    slwi r4, r6, 2
 ; CHECK-BE-NEXT:    vinswlx v2, r4, r3
 ; CHECK-BE-NEXT:    lwz r3, 1(r5)
-; CHECK-BE-NEXT:    extsw r4, r7
-; CHECK-BE-NEXT:    slwi r4, r4, 2
+; CHECK-BE-NEXT:    slwi r4, r7, 2
 ; CHECK-BE-NEXT:    vinswlx v2, r4, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testFloat2:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lwz r3, 0(r5)
 ; CHECK-P9-NEXT:    rlwinm r4, r6, 2, 28, 29
-; CHECK-P9-NEXT:    addi r6, r1, -16
+; CHECK-P9-NEXT:    lwz r6, 0(r5)
+; CHECK-P9-NEXT:    rlwinm r3, r7, 2, 28, 29
+; CHECK-P9-NEXT:    addi r7, r1, -16
 ; CHECK-P9-NEXT:    stxv v2, -16(r1)
-; CHECK-P9-NEXT:    stwx r3, r6, r4
-; CHECK-P9-NEXT:    rlwinm r4, r7, 2, 28, 29
+; CHECK-P9-NEXT:    stwx r6, r7, r4
 ; CHECK-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-P9-NEXT:    lwz r3, 1(r5)
+; CHECK-P9-NEXT:    lwz r4, 1(r5)
 ; CHECK-P9-NEXT:    addi r5, r1, -32
 ; CHECK-P9-NEXT:    stxv vs0, -32(r1)
-; CHECK-P9-NEXT:    stwx r3, r5, r4
+; CHECK-P9-NEXT:    stwx r4, r5, r3
 ; CHECK-P9-NEXT:    lxv v2, -32(r1)
 ; CHECK-P9-NEXT:    blr
 ;
-; AIX-P8-64-LABEL: testFloat2:
-; AIX-P8-64:       # %bb.0: # %entry
-; AIX-P8-64-NEXT:    lwz r7, 0(r3)
-; AIX-P8-64-NEXT:    addi r6, r1, -32
-; AIX-P8-64-NEXT:    rlwinm r4, r4, 2, 28, 29
-; AIX-P8-64-NEXT:    rlwinm r5, r5, 2, 28, 29
-; AIX-P8-64-NEXT:    stxvw4x v2, 0, r6
-; AIX-P8-64-NEXT:    stwx r7, r6, r4
-; AIX-P8-64-NEXT:    addi r4, r1, -16
-; AIX-P8-64-NEXT:    lxvw4x vs0, 0, r6
-; AIX-P8-64-NEXT:    lwz r3, 1(r3)
-; AIX-P8-64-NEXT:    stxvw4x vs0, 0, r4
-; AIX-P8-64-NEXT:    stwx r3, r4, r5
-; AIX-P8-64-NEXT:    lxvw4x v2, 0, r4
-; AIX-P8-64-NEXT:    blr
-;
-; AIX-P8-32-LABEL: testFloat2:
-; AIX-P8-32:       # %bb.0: # %entry
-; AIX-P8-32-NEXT:    lwz r7, 0(r3)
-; AIX-P8-32-NEXT:    addi r6, r1, -32
-; AIX-P8-32-NEXT:    rlwinm r4, r4, 2, 28, 29
-; AIX-P8-32-NEXT:    stxvw4x v2, 0, r6
-; AIX-P8-32-NEXT:    stwx r7, r6, r4
-; AIX-P8-32-NEXT:    rlwinm r4, r5, 2, 28, 29
-; AIX-P8-32-NEXT:    addi r5, r1, -16
-; AIX-P8-32-NEXT:    lxvw4x vs0, 0, r6
-; AIX-P8-32-NEXT:    lwz r3, 1(r3)
-; AIX-P8-32-NEXT:    stxvw4x vs0, 0, r5
-; AIX-P8-32-NEXT:    stwx r3, r5, r4
-; AIX-P8-32-NEXT:    lxvw4x v2, 0, r5
-; AIX-P8-32-NEXT:    blr
+; AIX-P8-LABEL: testFloat2:
+; AIX-P8:       # %bb.0: # %entry
+; AIX-P8-NEXT:    lwz r7, 0(r3)
+; AIX-P8-NEXT:    addi r6, r1, -32
+; AIX-P8-NEXT:    rlwinm r4, r4, 2, 28, 29
+; AIX-P8-NEXT:    stxvw4x v2, 0, r6
+; AIX-P8-NEXT:    stwx r7, r6, r4
+; AIX-P8-NEXT:    rlwinm r4, r5, 2, 28, 29
+; AIX-P8-NEXT:    addi r5, r1, -16
+; AIX-P8-NEXT:    lxvw4x vs0, 0, r6
+; AIX-P8-NEXT:    lwz r3, 1(r3)
+; AIX-P8-NEXT:    stxvw4x vs0, 0, r5
+; AIX-P8-NEXT:    stwx r3, r5, r4
+; AIX-P8-NEXT:    lxvw4x v2, 0, r5
+; AIX-P8-NEXT:    blr
 entry:
   %0 = bitcast i8* %b to float*
   %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1
@@ -475,13 +453,11 @@
 ; CHECK-LABEL: testFloat3:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    plwz r3, 65536(r5), 0
-; CHECK-NEXT:    extsw r4, r6
-; CHECK-NEXT:    slwi r4, r4, 2
+; CHECK-NEXT:    slwi r4, r6, 2
 ; CHECK-NEXT:    vinswrx v2, r4, r3
 ; CHECK-NEXT:    li r3, 1
-; CHECK-NEXT:    extsw r4, r7
+; CHECK-NEXT:    slwi r4, r7, 2
 ; CHECK-NEXT:    rldic r3, r3, 36, 27
-; CHECK-NEXT:    slwi r4, r4, 2
 ; CHECK-NEXT:    lwzx r3, r5, r3
 ; CHECK-NEXT:    vinswrx v2, r4, r3
 ; CHECK-NEXT:    blr
@@ -489,42 +465,39 @@
 ; CHECK-BE-LABEL: testFloat3:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    plwz r3, 65536(r5), 0
-; CHECK-BE-NEXT:    extsw r4, r6
-; CHECK-BE-NEXT:    slwi r4, r4, 2
+; CHECK-BE-NEXT:    slwi r4, r6, 2
 ; CHECK-BE-NEXT:    vinswlx v2, r4, r3
 ; CHECK-BE-NEXT:    li r3, 1
-; CHECK-BE-NEXT:    extsw r4, r7
+; CHECK-BE-NEXT:    slwi r4, r7, 2
 ; CHECK-BE-NEXT:    rldic r3, r3, 36, 27
-; CHECK-BE-NEXT:    slwi r4, r4, 2
 ; CHECK-BE-NEXT:    lwzx r3, r5, r3
 ; CHECK-BE-NEXT:    vinswlx v2, r4, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testFloat3:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r3, 1
 ; CHECK-P9-NEXT:    rlwinm r4, r6, 2, 28, 29
-; CHECK-P9-NEXT:    addi r6, r1, -16
-; CHECK-P9-NEXT:    lwzx r3, r5, r3
+; CHECK-P9-NEXT:    lis r6, 1
+; CHECK-P9-NEXT:    rlwinm r3, r7, 2, 28, 29
+; CHECK-P9-NEXT:    addi r7, r1, -16
+; CHECK-P9-NEXT:    lwzx r6, r5, r6
 ; CHECK-P9-NEXT:    stxv v2, -16(r1)
-; CHECK-P9-NEXT:    stwx r3, r6, r4
-; CHECK-P9-NEXT:    li r3, 1
-; CHECK-P9-NEXT:    rlwinm r4, r7, 2, 28, 29
+; CHECK-P9-NEXT:    stwx r6, r7, r4
+; CHECK-P9-NEXT:    li r4, 1
 ; CHECK-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-P9-NEXT:    rldic r3, r3, 36, 27
-; CHECK-P9-NEXT:    lwzx r3, r5, r3
+; CHECK-P9-NEXT:    rldic r4, r4, 36, 27
+; CHECK-P9-NEXT:    lwzx r4, r5, r4
 ; CHECK-P9-NEXT:    addi r5, r1, -32
 ; CHECK-P9-NEXT:    stxv vs0, -32(r1)
-; CHECK-P9-NEXT:    stwx r3, r5, r4
+; CHECK-P9-NEXT:    stwx r4, r5, r3
 ; CHECK-P9-NEXT:    lxv v2, -32(r1)
 ; CHECK-P9-NEXT:    blr
 ;
 ; AIX-P8-64-LABEL: testFloat3:
 ; AIX-P8-64:       # %bb.0: # %entry
 ; AIX-P8-64-NEXT:    lis r6, 1
-; AIX-P8-64-NEXT:    addi r7, r1, -32
 ; AIX-P8-64-NEXT:    rlwinm r4, r4, 2, 28, 29
-; AIX-P8-64-NEXT:    rlwinm r5, r5, 2, 28, 29
+; AIX-P8-64-NEXT:    addi r7, r1, -32
 ; AIX-P8-64-NEXT:    lwzx r6, r3, r6
 ; AIX-P8-64-NEXT:    stxvw4x v2, 0, r7
 ; AIX-P8-64-NEXT:    stwx r6, r7, r4
@@ -532,10 +505,11 @@
 ; AIX-P8-64-NEXT:    lxvw4x vs0, 0, r7
 ; AIX-P8-64-NEXT:    rldic r4, r4, 36, 27
 ; AIX-P8-64-NEXT:    lwzx r3, r3, r4
-; AIX-P8-64-NEXT:    addi r4, r1, -16
-; AIX-P8-64-NEXT:    stxvw4x vs0, 0, r4
-; AIX-P8-64-NEXT:    stwx r3, r4, r5
-; AIX-P8-64-NEXT:    lxvw4x v2, 0, r4
+; AIX-P8-64-NEXT:    rlwinm r4, r5, 2, 28, 29
+; AIX-P8-64-NEXT:    addi r5, r1, -16
+; AIX-P8-64-NEXT:    stxvw4x vs0, 0, r5
+; AIX-P8-64-NEXT:    stwx r3, r5, r4
+; AIX-P8-64-NEXT:    lxvw4x v2, 0, r5
 ; AIX-P8-64-NEXT:    blr
 ;
 ; AIX-P8-32-LABEL: testFloat3:
@@ -758,17 +732,15 @@
 define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) {
 ; CHECK-LABEL: testDouble1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    extsw r4, r6
 ; CHECK-NEXT:    mffprd r3, f1
-; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT:    rlwinm r4, r6, 3, 0, 28
 ; CHECK-NEXT:    vinsdrx v2, r4, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testDouble1:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    extsw r4, r6
 ; CHECK-BE-NEXT:    mffprd r3, f1
-; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT:    rlwinm r4, r6, 3, 0, 28
 ; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
 ; CHECK-BE-NEXT:    blr
 ;
@@ -807,41 +779,37 @@
 ; CHECK-LABEL: testDouble2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    ld r3, 0(r5)
-; CHECK-NEXT:    extsw r4, r6
-; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT:    rlwinm r4, r6, 3, 0, 28
 ; CHECK-NEXT:    vinsdrx v2, r4, r3
 ; CHECK-NEXT:    pld r3, 1(r5), 0
-; CHECK-NEXT:    extsw r4, r7
-; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT:    rlwinm r4, r7, 3, 0, 28
 ; CHECK-NEXT:    vinsdrx v2, r4, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testDouble2:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    ld r3, 0(r5)
-; CHECK-BE-NEXT:    extsw r4, r6
-; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT:    rlwinm r4, r6, 3, 0, 28
 ; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
 ; CHECK-BE-NEXT:    pld r3, 1(r5), 0
-; CHECK-BE-NEXT:    extsw r4, r7
-; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT:    rlwinm r4, r7, 3, 0, 28
 ; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testDouble2:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    ld r3, 0(r5)
 ; CHECK-P9-NEXT:    rlwinm r4, r6, 3, 28, 28
-; CHECK-P9-NEXT:    addi r6, r1, -32
+; CHECK-P9-NEXT:    ld r6, 0(r5)
+; CHECK-P9-NEXT:    rlwinm r3, r7, 3, 28, 28
+; CHECK-P9-NEXT:    addi r7, r1, -32
 ; CHECK-P9-NEXT:    stxv v2, -32(r1)
-; CHECK-P9-NEXT:    stdx r3, r6, r4
-; CHECK-P9-NEXT:    li r3, 1
-; CHECK-P9-NEXT:    rlwinm r4, r7, 3, 28, 28
+; CHECK-P9-NEXT:    stdx r6, r7, r4
+; CHECK-P9-NEXT:    li r4, 1
 ; CHECK-P9-NEXT:    lxv vs0, -32(r1)
-; CHECK-P9-NEXT:    ldx r3, r5, r3
+; CHECK-P9-NEXT:    ldx r4, r5, r4
 ; CHECK-P9-NEXT:    addi r5, r1, -16
 ; CHECK-P9-NEXT:    stxv vs0, -16(r1)
-; CHECK-P9-NEXT:    stdx r3, r5, r4
+; CHECK-P9-NEXT:    stdx r4, r5, r3
 ; CHECK-P9-NEXT:    lxv v2, -16(r1)
 ; CHECK-P9-NEXT:    blr
 ;
@@ -850,16 +818,16 @@
 ; AIX-P8-64-NEXT:    ld r7, 0(r3)
 ; AIX-P8-64-NEXT:    addi r6, r1, -32
 ; AIX-P8-64-NEXT:    rlwinm r4, r4, 3, 28, 28
-; AIX-P8-64-NEXT:    rlwinm r5, r5, 3, 28, 28
 ; AIX-P8-64-NEXT:    stxvd2x v2, 0, r6
 ; AIX-P8-64-NEXT:    stdx r7, r6, r4
 ; AIX-P8-64-NEXT:    li r4, 1
 ; AIX-P8-64-NEXT:    lxvd2x vs0, 0, r6
 ; AIX-P8-64-NEXT:    ldx r3, r3, r4
-; AIX-P8-64-NEXT:    addi r4, r1, -16
-; AIX-P8-64-NEXT:    stxvd2x vs0, 0, r4
-; AIX-P8-64-NEXT:    stdx r3, r4, r5
-; AIX-P8-64-NEXT:    lxvd2x v2, 0, r4
+; AIX-P8-64-NEXT:    rlwinm r4, r5, 3, 28, 28
+; AIX-P8-64-NEXT:    addi r5, r1, -16
+; AIX-P8-64-NEXT:    stxvd2x vs0, 0, r5
+; AIX-P8-64-NEXT:    stdx r3, r5, r4
+; AIX-P8-64-NEXT:    lxvd2x v2, 0, r5
 ; AIX-P8-64-NEXT:    blr
 ;
 ; AIX-P8-32-LABEL: testDouble2:
@@ -892,13 +860,11 @@
 ; CHECK-LABEL: testDouble3:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, 65536(r5), 0
-; CHECK-NEXT:    extsw r4, r6
-; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT:    rlwinm r4, r6, 3, 0, 28
 ; CHECK-NEXT:    vinsdrx v2, r4, r3
 ; CHECK-NEXT:    li r3, 1
-; CHECK-NEXT:    extsw r4, r7
+; CHECK-NEXT:    rlwinm r4, r7, 3, 0, 28
 ; CHECK-NEXT:    rldic r3, r3, 36, 27
-; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
 ; CHECK-NEXT:    ldx r3, r5, r3
 ; CHECK-NEXT:    vinsdrx v2, r4, r3
 ; CHECK-NEXT:    blr
@@ -906,53 +872,51 @@
 ; CHECK-BE-LABEL: testDouble3:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    pld r3, 65536(r5), 0
-; CHECK-BE-NEXT:    extsw r4, r6
-; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT:    rlwinm r4, r6, 3, 0, 28
 ; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
 ; CHECK-BE-NEXT:    li r3, 1
-; CHECK-BE-NEXT:    extsw r4, r7
+; CHECK-BE-NEXT:    rlwinm r4, r7, 3, 0, 28
 ; CHECK-BE-NEXT:    rldic r3, r3, 36, 27
-; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
 ; CHECK-BE-NEXT:    ldx r3, r5, r3
 ; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testDouble3:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r3, 1
 ; CHECK-P9-NEXT:    rlwinm r4, r6, 3, 28, 28
-; CHECK-P9-NEXT:    addi r6, r1, -32
-; CHECK-P9-NEXT:    ldx r3, r5, r3
+; CHECK-P9-NEXT:    lis r6, 1
+; CHECK-P9-NEXT:    rlwinm r3, r7, 3, 28, 28
+; CHECK-P9-NEXT:    addi r7, r1, -32
+; CHECK-P9-NEXT:    ldx r6, r5, r6
 ; CHECK-P9-NEXT:    stxv v2, -32(r1)
-; CHECK-P9-NEXT:    stdx r3, r6, r4
-; CHECK-P9-NEXT:    li r3, 1
-; CHECK-P9-NEXT:    rlwinm r4, r7, 3, 28, 28
+; CHECK-P9-NEXT:    stdx r6, r7, r4
+; CHECK-P9-NEXT:    li r4, 1
 ; CHECK-P9-NEXT:    lxv vs0, -32(r1)
-; CHECK-P9-NEXT:    rldic r3, r3, 36, 27
-; CHECK-P9-NEXT:    ldx r3, r5, r3
+; CHECK-P9-NEXT:    rldic r4, r4, 36, 27
+; CHECK-P9-NEXT:    ldx r4, r5, r4
 ; CHECK-P9-NEXT:    addi r5, r1, -16
 ; CHECK-P9-NEXT:    stxv vs0, -16(r1)
-; CHECK-P9-NEXT:    stdx r3, r5, r4
+; CHECK-P9-NEXT:    stdx r4, r5, r3
 ; CHECK-P9-NEXT:    lxv v2, -16(r1)
 ; CHECK-P9-NEXT:    blr
 ;
 ; AIX-P8-64-LABEL: testDouble3:
 ; AIX-P8-64:       # %bb.0: # %entry
 ; AIX-P8-64-NEXT:    lis r6, 1
-; AIX-P8-64-NEXT:    addi r7, r1, -32
 ; AIX-P8-64-NEXT:    rlwinm r4, r4, 3, 28, 28
+; AIX-P8-64-NEXT:    addi r7, r1, -32
 ; AIX-P8-64-NEXT:    li r8, 1
-; AIX-P8-64-NEXT:    rlwinm r5, r5, 3, 28, 28
 ; AIX-P8-64-NEXT:    ldx r6, r3, r6
 ; AIX-P8-64-NEXT:    stxvd2x v2, 0, r7
 ; AIX-P8-64-NEXT:    stdx r6, r7, r4
 ; AIX-P8-64-NEXT:    rldic r4, r8, 36, 27
 ; AIX-P8-64-NEXT:    lxvd2x vs0, 0, r7
 ; AIX-P8-64-NEXT:    ldx r3, r3, r4
-; AIX-P8-64-NEXT:    addi r4, r1, -16
-; AIX-P8-64-NEXT:    stxvd2x vs0, 0, r4
-; AIX-P8-64-NEXT:    stdx r3, r4, r5
-; AIX-P8-64-NEXT:    lxvd2x v2, 0, r4
+; AIX-P8-64-NEXT:    rlwinm r4, r5, 3, 28, 28
+; AIX-P8-64-NEXT:    addi r5, r1, -16
+; AIX-P8-64-NEXT:    stxvd2x vs0, 0, r5
+; AIX-P8-64-NEXT:    stdx r3, r5, r4
+; AIX-P8-64-NEXT:    lxvd2x v2, 0, r5
 ; AIX-P8-64-NEXT:    blr
 ;
 ; AIX-P8-32-LABEL: testDouble3:
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll
@@ -26,6 +26,8 @@
 define half @extractelt_nxv1f16_idx(<vscale x 1 x half> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv1f16_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
@@ -58,6 +60,8 @@
 define half @extractelt_nxv2f16_idx(<vscale x 2 x half> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv2f16_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
@@ -90,6 +94,8 @@
 define half @extractelt_nxv4f16_idx(<vscale x 4 x half> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv4f16_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
@@ -122,6 +128,8 @@
 define half @extractelt_nxv8f16_idx(<vscale x 8 x half> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv8f16_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
@@ -154,6 +162,8 @@
 define half @extractelt_nxv16f16_idx(<vscale x 16 x half> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv16f16_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e16, m4, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
@@ -186,6 +196,8 @@
 define half @extractelt_nxv32f16_idx(<vscale x 32 x half> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv32f16_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e16, m8, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
@@ -218,6 +230,8 @@
 define float @extractelt_nxv1f32_idx(<vscale x 1 x float> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv1f32_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
@@ -250,6 +264,8 @@
 define float @extractelt_nxv2f32_idx(<vscale x 2 x float> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv2f32_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
@@ -282,6 +298,8 @@
 define float @extractelt_nxv4f32_idx(<vscale x 4 x float> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv4f32_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
@@ -314,6 +332,8 @@
 define float @extractelt_nxv8f32_idx(<vscale x 8 x float> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv8f32_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e32, m4, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
@@ -346,6 +366,8 @@
 define float @extractelt_nxv16f32_idx(<vscale x 16 x float> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv16f32_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e32, m8, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
@@ -378,6 +400,8 @@
 define double @extractelt_nxv1f64_idx(<vscale x 1 x double> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv1f64_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
@@ -410,6 +434,8 @@
 define double @extractelt_nxv2f64_idx(<vscale x 2 x double> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv2f64_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
@@ -442,6 +468,8 @@
 define double @extractelt_nxv4f64_idx(<vscale x 4 x double> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv4f64_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e64, m4, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
@@ -474,6 +502,8 @@
 define double @extractelt_nxv8f64_idx(<vscale x 8 x double> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv8f64_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e64, m8, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vfmv.f.s fa0, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
@@ -26,6 +26,8 @@
 define signext i8 @extractelt_nxv1i8_idx(<vscale x 1 x i8> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv1i8_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -58,6 +60,8 @@
 define signext i8 @extractelt_nxv2i8_idx(<vscale x 2 x i8> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv2i8_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e8, mf4, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -90,6 +94,8 @@
 define signext i8 @extractelt_nxv4i8_idx(<vscale x 4 x i8> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv4i8_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e8, mf2, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -122,6 +128,8 @@
 define signext i8 @extractelt_nxv8i8_idx(<vscale x 8 x i8> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv8i8_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -154,6 +162,8 @@
 define signext i8 @extractelt_nxv16i8_idx(<vscale x 16 x i8> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv16i8_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e8, m2, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -186,6 +196,8 @@
 define signext i8 @extractelt_nxv32i8_idx(<vscale x 32 x i8> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv32i8_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e8, m4, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -218,6 +230,8 @@
 define signext i8 @extractelt_nxv64i8_idx(<vscale x 64 x i8> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv64i8_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e8, m8, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -250,6 +264,8 @@
 define signext i16 @extractelt_nxv1i16_idx(<vscale x 1 x i16> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv1i16_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -282,6 +298,8 @@
 define signext i16 @extractelt_nxv2i16_idx(<vscale x 2 x i16> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv2i16_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -314,6 +332,8 @@
 define signext i16 @extractelt_nxv4i16_idx(<vscale x 4 x i16> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv4i16_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -346,6 +366,8 @@
 define signext i16 @extractelt_nxv8i16_idx(<vscale x 8 x i16> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv8i16_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -378,6 +400,8 @@
 define signext i16 @extractelt_nxv16i16_idx(<vscale x 16 x i16> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv16i16_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e16, m4, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -410,6 +434,8 @@
 define signext i16 @extractelt_nxv32i16_idx(<vscale x 32 x i16> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv32i16_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e16, m8, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -442,6 +468,8 @@
 define signext i32 @extractelt_nxv1i32_idx(<vscale x 1 x i32> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv1i32_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -474,6 +502,8 @@
 define signext i32 @extractelt_nxv2i32_idx(<vscale x 2 x i32> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv2i32_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -506,6 +536,8 @@
 define signext i32 @extractelt_nxv4i32_idx(<vscale x 4 x i32> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv4i32_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -538,6 +570,8 @@
 define signext i32 @extractelt_nxv8i32_idx(<vscale x 8 x i32> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv8i32_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e32, m4, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -570,6 +604,8 @@
 define signext i32 @extractelt_nxv16i32_idx(<vscale x 16 x i32> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv16i32_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e32, m8, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -602,6 +638,8 @@
 define i64 @extractelt_nxv1i64_idx(<vscale x 1 x i64> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv1i64_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -634,6 +672,8 @@
 define i64 @extractelt_nxv2i64_idx(<vscale x 2 x i64> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv2i64_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -666,6 +706,8 @@
 define i64 @extractelt_nxv4i64_idx(<vscale x 4 x i64> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv4i64_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e64, m4, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
@@ -698,6 +740,8 @@
 define i64 @extractelt_nxv8i64_idx(<vscale x 8 x i64> %v, i32 signext %idx) {
 ; CHECK-LABEL: extractelt_nxv8i64_idx:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    vsetivli zero, 1, e64, m8, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v8, v8, a0
 ; CHECK-NEXT:    vmv.x.s a0, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -248,43 +248,77 @@
 }
 
 define i8 @extractelt_v16i8_idx(<16 x i8>* %x, i32 signext %idx) nounwind {
-; CHECK-LABEL: extractelt_v16i8_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vx v8, v8, a1
-; CHECK-NEXT:    vmv.x.s a0, v8
-; CHECK-NEXT:    ret
+; RV32-LABEL: extractelt_v16i8_idx:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; RV32-NEXT:    vle8.v v8, (a0)
+; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, mu
+; RV32-NEXT:    vslidedown.vx v8, v8, a1
+; RV32-NEXT:    vmv.x.s a0, v8
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: extractelt_v16i8_idx:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; RV64-NEXT:    vle8.v v8, (a0)
+; RV64-NEXT:    slli a0, a1, 32
+; RV64-NEXT:    srli a0, a0, 32
+; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, mu
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
+; RV64-NEXT:    vmv.x.s a0, v8
+; RV64-NEXT:    ret
   %a = load <16 x i8>, <16 x i8>* %x
   %b = extractelement <16 x i8> %a, i32 %idx
   ret i8 %b
 }
 
 define i16 @extractelt_v8i16_idx(<8 x i16>* %x, i32 signext %idx) nounwind {
-; CHECK-LABEL: extractelt_v8i16_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vx v8, v8, a1
-; CHECK-NEXT:    vmv.x.s a0, v8
-; CHECK-NEXT:    ret
+; RV32-LABEL: extractelt_v8i16_idx:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
+; RV32-NEXT:    vle16.v v8, (a0)
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, mu
+; RV32-NEXT:    vslidedown.vx v8, v8, a1
+; RV32-NEXT:    vmv.x.s a0, v8
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: extractelt_v8i16_idx:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
+; RV64-NEXT:    vle16.v v8, (a0)
+; RV64-NEXT:    slli a0, a1, 32
+; RV64-NEXT:    srli a0, a0, 32
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, mu
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
+; RV64-NEXT:    vmv.x.s a0, v8
+; RV64-NEXT:    ret
   %a = load <8 x i16>, <8 x i16>* %x
   %b = extractelement <8 x i16> %a, i32 %idx
   ret i16 %b
 }
 
 define i32 @extractelt_v4i32_idx(<4 x i32>* %x, i32 signext %idx) nounwind {
-; CHECK-LABEL: extractelt_v4i32_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vadd.vv v8, v8, v8
-; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vx v8, v8, a1
-; CHECK-NEXT:    vmv.x.s a0, v8
-; CHECK-NEXT:    ret
+; RV32-LABEL: extractelt_v4i32_idx:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; RV32-NEXT:    vle32.v v8, (a0)
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
+; RV32-NEXT:    vslidedown.vx v8, v8, a1
+; RV32-NEXT:    vmv.x.s a0, v8
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: extractelt_v4i32_idx:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; RV64-NEXT:    vle32.v v8, (a0)
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    slli a0, a1, 32
+; RV64-NEXT:    srli a0, a0, 32
+; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
+; RV64-NEXT:    vmv.x.s a0, v8
+; RV64-NEXT:    ret
   %a = load <4 x i32>, <4 x i32>* %x
   %b = add <4 x i32> %a, %a
   %c = extractelement <4 x i32> %b, i32 %idx
@@ -310,8 +344,10 @@
 ; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    slli a0, a1, 32
+; RV64-NEXT:    srli a0, a0, 32
 ; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; RV64-NEXT:    vslidedown.vx v8, v8, a1
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    ret
   %a = load <2 x i64>, <2 x i64>* %x
@@ -321,15 +357,27 @@
 }
 
 define half @extractelt_v8f16_idx(<8 x half>* %x, i32 signext %idx) nounwind {
-; CHECK-LABEL: extractelt_v8f16_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vx v8, v8, a1
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; RV32-LABEL: extractelt_v8f16_idx:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
+; RV32-NEXT:    vle16.v v8, (a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, mu
+; RV32-NEXT:    vslidedown.vx v8, v8, a1
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: extractelt_v8f16_idx:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
+; RV64-NEXT:    vle16.v v8, (a0)
+; RV64-NEXT:    vfadd.vv v8, v8, v8
+; RV64-NEXT:    slli a0, a1, 32
+; RV64-NEXT:    srli a0, a0, 32
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, mu
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
+; RV64-NEXT:    vfmv.f.s fa0, v8
+; RV64-NEXT:    ret
   %a = load <8 x half>, <8 x half>* %x
   %b = fadd <8 x half> %a, %a
   %c = extractelement <8 x half> %b, i32 %idx
@@ -337,15 +385,27 @@
 }
 
 define float @extractelt_v4f32_idx(<4 x float>* %x, i32 signext %idx) nounwind {
-; CHECK-LABEL: extractelt_v4f32_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8
-; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vx v8, v8, a1
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; RV32-LABEL: extractelt_v4f32_idx:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; RV32-NEXT:    vle32.v v8, (a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
+; RV32-NEXT:    vslidedown.vx v8, v8, a1
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: extractelt_v4f32_idx:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; RV64-NEXT:    vle32.v v8, (a0)
+; RV64-NEXT:    vfadd.vv v8, v8, v8
+; RV64-NEXT:    slli a0, a1, 32
+; RV64-NEXT:    srli a0, a0, 32
+; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
+; RV64-NEXT:    vfmv.f.s fa0, v8
+; RV64-NEXT:    ret
   %a = load <4 x float>, <4 x float>* %x
   %b = fadd <4 x float> %a, %a
   %c = extractelement <4 x float> %b, i32 %idx
@@ -353,15 +413,27 @@
 }
 
 define double @extractelt_v2f64_idx(<2 x double>* %x, i32 signext %idx) nounwind {
-; CHECK-LABEL: extractelt_v2f64_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vx v8, v8, a1
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; RV32-LABEL: extractelt_v2f64_idx:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
+; RV32-NEXT:    vslidedown.vx v8, v8, a1
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: extractelt_v2f64_idx:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vfadd.vv v8, v8, v8
+; RV64-NEXT:    slli a0, a1, 32
+; RV64-NEXT:    srli a0, a0, 32
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
+; RV64-NEXT:    vfmv.f.s fa0, v8
+; RV64-NEXT:    ret
   %a = load <2 x double>, <2 x double>* %x
   %b = fadd <2 x double> %a, %a
   %c = extractelement <2 x double> %b, i32 %idx
@@ -369,44 +441,79 @@
 }
 
 define i8 @extractelt_v32i8_idx(<32 x i8>* %x, i32 signext %idx) nounwind {
-; CHECK-LABEL: extractelt_v32i8_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a2, 32
-; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, mu
-; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vsetivli zero, 1, e8, m2, ta, mu
-; CHECK-NEXT:    vslidedown.vx v8, v8, a1
-; CHECK-NEXT:    vmv.x.s a0, v8
-; CHECK-NEXT:    ret
+; RV32-LABEL: extractelt_v32i8_idx:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    vsetvli zero, a2, e8, m2, ta, mu
+; RV32-NEXT:    vle8.v v8, (a0)
+; RV32-NEXT:    vsetivli zero, 1, e8, m2, ta, mu
+; RV32-NEXT:    vslidedown.vx v8, v8, a1
+; RV32-NEXT:    vmv.x.s a0, v8
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: extractelt_v32i8_idx:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    vsetvli zero, a2, e8, m2, ta, mu
+; RV64-NEXT:    vle8.v v8, (a0)
+; RV64-NEXT:    slli a0, a1, 32
+; RV64-NEXT:    srli a0, a0, 32
+; RV64-NEXT:    vsetivli zero, 1, e8, m2, ta, mu
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
+; RV64-NEXT:    vmv.x.s a0, v8
+; RV64-NEXT:    ret
   %a = load <32 x i8>, <32 x i8>* %x
   %b = extractelement <32 x i8> %a, i32 %idx
   ret i8 %b
 }
 
 define i16 @extractelt_v16i16_idx(<16 x i16>* %x, i32 signext %idx) nounwind {
-; CHECK-LABEL: extractelt_v16i16_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
-; CHECK-NEXT:    vslidedown.vx v8, v8, a1
-; CHECK-NEXT:    vmv.x.s a0, v8
-; CHECK-NEXT:    ret
+; RV32-LABEL: extractelt_v16i16_idx:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
+; RV32-NEXT:    vle16.v v8, (a0)
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
+; RV32-NEXT:    vslidedown.vx v8, v8, a1
+; RV32-NEXT:    vmv.x.s a0, v8
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: extractelt_v16i16_idx:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
+; RV64-NEXT:    vle16.v v8, (a0)
+; RV64-NEXT:    slli a0, a1, 32
+; RV64-NEXT:    srli a0, a0, 32
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
+; RV64-NEXT:    vmv.x.s a0, v8
+; RV64-NEXT:    ret
   %a = load <16 x i16>, <16 x i16>* %x
   %b = extractelement <16 x i16> %a, i32 %idx
   ret i16 %b
 }
 
 define i32 @extractelt_v8i32_idx(<8 x i32>* %x, i32 signext %idx) nounwind {
-; CHECK-LABEL: extractelt_v8i32_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vadd.vv v8, v8, v8
-; CHECK-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
-; CHECK-NEXT:    vslidedown.vx v8, v8, a1
-; CHECK-NEXT:    vmv.x.s a0, v8
-; CHECK-NEXT:    ret
+; RV32-LABEL: extractelt_v8i32_idx:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
+; RV32-NEXT:    vle32.v v8, (a0)
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
+; RV32-NEXT:    vslidedown.vx v8, v8, a1
+; RV32-NEXT:    vmv.x.s a0, v8
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: extractelt_v8i32_idx:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
+; RV64-NEXT:    vle32.v v8, (a0)
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    slli a0, a1, 32
+; RV64-NEXT:    srli a0, a0, 32
+; RV64-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
+; RV64-NEXT:    vmv.x.s a0, v8
+; RV64-NEXT:    ret
   %a = load <8 x i32>, <8 x i32>* %x
   %b = add <8 x i32> %a, %a
   %c = extractelement <8 x i32> %b, i32 %idx
@@ -432,8 +539,10 @@
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    slli a0, a1, 32
+; RV64-NEXT:    srli a0, a0, 32
 ; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
-; RV64-NEXT:    vslidedown.vx v8, v8, a1
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    ret
   %a = load <4 x i64>, <4 x i64>* %x
@@ -443,15 +552,27 @@
 }
 
 define half @extractelt_v16f16_idx(<16 x half>* %x, i32 signext %idx) nounwind {
-; CHECK-LABEL: extractelt_v16f16_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8
-; CHECK-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
-; CHECK-NEXT:    vslidedown.vx v8, v8, a1
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; RV32-LABEL: extractelt_v16f16_idx:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
+; RV32-NEXT:    vle16.v v8, (a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
+; RV32-NEXT:    vslidedown.vx v8, v8, a1
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: extractelt_v16f16_idx:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
+; RV64-NEXT:    vle16.v v8, (a0)
+; RV64-NEXT:    vfadd.vv v8, v8, v8
+; RV64-NEXT:    slli a0, a1, 32
+; RV64-NEXT:    srli a0, a0, 32
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, mu
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
+; RV64-NEXT:    vfmv.f.s fa0, v8
+; RV64-NEXT:    ret
   %a = load <16 x half>, <16 x half>* %x
   %b = fadd <16 x half> %a, %a
   %c = extractelement <16 x half> %b, i32 %idx
@@ -459,15 +580,27 @@
 }
 
 define float @extractelt_v8f32_idx(<8 x float>* %x, i32 signext %idx) nounwind {
-; CHECK-LABEL: extractelt_v8f32_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8
-; CHECK-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
-; CHECK-NEXT:    vslidedown.vx v8, v8, a1
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; RV32-LABEL: extractelt_v8f32_idx:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
+; RV32-NEXT:    vle32.v v8, (a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8
+; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
+; RV32-NEXT:    vslidedown.vx v8, v8, a1
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: extractelt_v8f32_idx:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
+; RV64-NEXT:    vle32.v v8, (a0)
+; RV64-NEXT:    vfadd.vv v8, v8, v8
+; RV64-NEXT:    slli a0, a1, 32
+; RV64-NEXT:    srli a0, a0, 32
+; RV64-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
+; RV64-NEXT:    vfmv.f.s fa0, v8
+; RV64-NEXT:    ret
   %a = load <8 x float>, <8 x float>* %x
   %b = fadd <8 x float> %a, %a
   %c = extractelement <8 x float> %b, i32 %idx
@@ -475,15 +608,27 @@
 }
 
 define double @extractelt_v4f64_idx(<4 x double>* %x, i32 signext %idx) nounwind {
-; CHECK-LABEL: extractelt_v4f64_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfadd.vv v8, v8, v8
-; CHECK-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
-; CHECK-NEXT:    vslidedown.vx v8, v8, a1
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; RV32-LABEL: extractelt_v4f64_idx:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    vfadd.vv v8, v8, v8
+; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
+; RV32-NEXT:    vslidedown.vx v8, v8, a1
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: extractelt_v4f64_idx:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vfadd.vv v8, v8, v8
+; RV64-NEXT:    slli a0, a1, 32
+; RV64-NEXT:    srli a0, a0, 32
+; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
+; RV64-NEXT:    vfmv.f.s fa0, v8
+; RV64-NEXT:    ret
   %a = load <4 x double>, <4 x double>* %x
   %b = fadd <4 x double> %a, %a
   %c = extractelement <4 x double> %b, i32 %idx
@@ -514,8 +659,10 @@
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    slli a0, a1, 32
+; RV64-NEXT:    srli a0, a0, 32
 ; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, mu
-; RV64-NEXT:    vslidedown.vx v8, v8, a1
+; RV64-NEXT:    vslidedown.vx v8, v8, a0
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    ret
   %a = load <3 x i64>, <3 x i64>* %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll
@@ -17,34 +17,19 @@
 }
 
 define <1 x i1> @insertelt_idx_v1i1(<1 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind {
-; RV32-LABEL: insertelt_idx_v1i1:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
-; RV32-NEXT:    vmv.s.x v8, a0
-; RV32-NEXT:    vmv.v.i v9, 0
-; RV32-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV32-NEXT:    addi a0, a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
-; RV32-NEXT:    vslideup.vx v9, v8, a1
-; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
-; RV32-NEXT:    vand.vi v8, v9, 1
-; RV32-NEXT:    vmsne.vi v0, v8, 0
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: insertelt_idx_v1i1:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
-; RV64-NEXT:    vmv.s.x v8, a0
-; RV64-NEXT:    vmv.v.i v9, 0
-; RV64-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64-NEXT:    sext.w a0, a1
-; RV64-NEXT:    addi a1, a0, 1
-; RV64-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
-; RV64-NEXT:    vslideup.vx v9, v8, a0
-; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
-; RV64-NEXT:    vand.vi v8, v9, 1
-; RV64-NEXT:    vmsne.vi v0, v8, 0
-; RV64-NEXT:    ret
+; CHECK-LABEL: insertelt_idx_v1i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    vmv.v.i v9, 0
+; CHECK-NEXT:    vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT:    addi a0, a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; CHECK-NEXT:    vslideup.vx v9, v8, a1
+; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
   %y = insertelement <1 x i1> %x, i1 %elt, i32 %idx
   ret <1 x i1> %y
 }
@@ -67,34 +52,19 @@
 }
 
 define <2 x i1> @insertelt_idx_v2i1(<2 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind {
-; RV32-LABEL: insertelt_idx_v2i1:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
-; RV32-NEXT:    vmv.s.x v8, a0
-; RV32-NEXT:    vmv.v.i v9, 0
-; RV32-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV32-NEXT:    addi a0, a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
-; RV32-NEXT:    vslideup.vx v9, v8, a1
-; RV32-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
-; RV32-NEXT:    vand.vi v8, v9, 1
-; RV32-NEXT:    vmsne.vi v0, v8, 0
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: insertelt_idx_v2i1:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
-; RV64-NEXT:    vmv.s.x v8, a0
-; RV64-NEXT:    vmv.v.i v9, 0
-; RV64-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64-NEXT:    sext.w a0, a1
-; RV64-NEXT:    addi a1, a0, 1
-; RV64-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
-; RV64-NEXT:    vslideup.vx v9, v8, a0
-; RV64-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
-; RV64-NEXT:    vand.vi v8, v9, 1
-; RV64-NEXT:    vmsne.vi v0, v8, 0
-; RV64-NEXT:    ret
+; CHECK-LABEL: insertelt_idx_v2i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    vmv.v.i v9, 0
+; CHECK-NEXT:    vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT:    addi a0, a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; CHECK-NEXT:    vslideup.vx v9, v8, a1
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
   %y = insertelement <2 x i1> %x, i1 %elt, i32 %idx
   ret <2 x i1> %y
 }
@@ -117,34 +87,19 @@
 }
 
 define <8 x i1> @insertelt_idx_v8i1(<8 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind {
-; RV32-LABEL: insertelt_idx_v8i1:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; RV32-NEXT:    vmv.s.x v8, a0
-; RV32-NEXT:    vmv.v.i v9, 0
-; RV32-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV32-NEXT:    addi a0, a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
-; RV32-NEXT:    vslideup.vx v9, v8, a1
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; RV32-NEXT:    vand.vi v8, v9, 1
-; RV32-NEXT:    vmsne.vi v0, v8, 0
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: insertelt_idx_v8i1:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; RV64-NEXT:    vmv.s.x v8, a0
-; RV64-NEXT:    vmv.v.i v9, 0
-; RV64-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64-NEXT:    sext.w a0, a1
-; RV64-NEXT:    addi a1, a0, 1
-; RV64-NEXT:    vsetvli zero, a1, e8, mf2, tu, mu
-; RV64-NEXT:    vslideup.vx v9, v8, a0
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; RV64-NEXT:    vand.vi v8, v9, 1
-; RV64-NEXT:    vmsne.vi v0, v8, 0
-; RV64-NEXT:    ret
+; CHECK-LABEL: insertelt_idx_v8i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    vmv.v.i v9, 0
+; CHECK-NEXT:    vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT:    addi a0, a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT:    vslideup.vx v9, v8, a1
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vand.vi v8, v9, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
   %y = insertelement <8 x i1> %x, i1 %elt, i32 %idx
   ret <8 x i1> %y
 }
@@ -168,36 +123,23 @@
 }
 
 define <64 x i1> @insertelt_idx_v64i1(<64 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind {
-; RV32-LABEL: insertelt_idx_v64i1:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a2, 64
-; RV32-NEXT:    vsetvli zero, a2, e8, m4, ta, mu
-; RV32-NEXT:    vmv.s.x v8, a0
-; RV32-NEXT:    vmv.v.i v12, 0
-; RV32-NEXT:    vmerge.vim v12, v12, 1, v0
-; RV32-NEXT:    addi a0, a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e8, m4, tu, mu
-; RV32-NEXT:    vslideup.vx v12, v8, a1
-; RV32-NEXT:    vsetvli zero, a2, e8, m4, ta, mu
-; RV32-NEXT:    vand.vi v8, v12, 1
-; RV32-NEXT:    vmsne.vi v0, v8, 0
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: insertelt_idx_v64i1:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a2, 64
-; RV64-NEXT:    vsetvli zero, a2, e8, m4, ta, mu
-; RV64-NEXT:    vmv.s.x v8, a0
-; RV64-NEXT:    vmv.v.i v12, 0
-; RV64-NEXT:    vmerge.vim v12, v12, 1, v0
-; RV64-NEXT:    sext.w a0, a1
-; RV64-NEXT:    addi a1, a0, 1
-; RV64-NEXT:    vsetvli zero, a1, e8, m4, tu, mu
-; RV64-NEXT:    vslideup.vx v12, v8, a0
-; RV64-NEXT:    vsetvli zero, a2, e8, m4, ta, mu
-; RV64-NEXT:    vand.vi v8, v12, 1
-; RV64-NEXT:    vmsne.vi v0, v8, 0
-; RV64-NEXT:    ret
+; CHECK-LABEL: insertelt_idx_v64i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 64
+; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, mu
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    vmv.v.i v12, 0
+; CHECK-NEXT:    vmerge.vim v12, v12, 1, v0
+; CHECK-NEXT:    addi a0, a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, tu, mu
+; CHECK-NEXT:    vslideup.vx v12, v8, a1
+; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, mu
+; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
   %y = insertelement <64 x i1> %x, i1 %elt, i32 %idx
   ret <64 x i1> %y
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -110,7 +110,8 @@
 ; RV64-NEXT:    vsetvli zero, a3, e16, m4, ta, mu
 ; RV64-NEXT:    vle16.v v8, (a0)
 ; RV64-NEXT:    vmv.s.x v12, a1
-; RV64-NEXT:    sext.w a1, a2
+; RV64-NEXT:    slli a1, a2, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    addi a2, a1, 1
 ; RV64-NEXT:    vsetvli zero, a2, e16, m4, tu, mu
 ; RV64-NEXT:    vslideup.vx v8, v12, a1
@@ -141,7 +142,8 @@
 ; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; RV64-NEXT:    vle32.v v8, (a0)
 ; RV64-NEXT:    vfmv.s.f v10, fa0
-; RV64-NEXT:    sext.w a1, a1
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    addi a2, a1, 1
 ; RV64-NEXT:    vsetvli zero, a2, e32, m2, tu, mu
 ; RV64-NEXT:    vslideup.vx v8, v10, a1
@@ -190,7 +192,8 @@
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    li a2, -1
 ; RV64-NEXT:    vmv.s.x v12, a2
-; RV64-NEXT:    sext.w a1, a1
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    addi a2, a1, 1
 ; RV64-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
 ; RV64-NEXT:    vslideup.vx v8, v12, a1
@@ -239,7 +242,8 @@
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    li a2, 6
 ; RV64-NEXT:    vmv.s.x v12, a2
-; RV64-NEXT:    sext.w a1, a1
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    addi a2, a1, 1
 ; RV64-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
 ; RV64-NEXT:    vslideup.vx v8, v12, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll
--- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll
@@ -29,6 +29,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, mu
 ; CHECK-NEXT:    vfmv.s.f v9, fa0
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v9, a0
@@ -64,6 +66,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
 ; CHECK-NEXT:    vfmv.s.f v9, fa0
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v9, a0
@@ -99,6 +103,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, mu
 ; CHECK-NEXT:    vfmv.s.f v9, fa0
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v9, a0
@@ -134,6 +140,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfmv.s.f v10, fa0
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v10, a0
@@ -169,6 +177,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfmv.s.f v12, fa0
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v12, a0
@@ -204,6 +214,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vfmv.s.f v16, fa0
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e16, m8, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v16, a0
@@ -239,6 +251,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, mu
 ; CHECK-NEXT:    vfmv.s.f v9, fa0
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v9, a0
@@ -274,6 +288,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
 ; CHECK-NEXT:    vfmv.s.f v9, fa0
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v9, a0
@@ -309,6 +325,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfmv.s.f v10, fa0
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v10, a0
@@ -344,6 +362,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfmv.s.f v12, fa0
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v12, a0
@@ -379,6 +399,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vfmv.s.f v16, fa0
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v16, a0
@@ -414,6 +436,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
 ; CHECK-NEXT:    vfmv.s.f v9, fa0
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v9, a0
@@ -449,6 +473,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vfmv.s.f v10, fa0
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v10, a0
@@ -484,6 +510,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vfmv.s.f v12, fa0
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v12, a0
@@ -519,6 +547,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vfmv.s.f v16, fa0
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v16, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll
--- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll
@@ -29,9 +29,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e8, mf8, ta, mu
 ; CHECK-NEXT:    vmv.s.x v9, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v9, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 1 x i8> %v, i8 %elt, i32 %idx
   ret <vscale x 1 x i8> %r
@@ -64,9 +66,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
 ; CHECK-NEXT:    vmv.s.x v9, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v9, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 2 x i8> %v, i8 %elt, i32 %idx
   ret <vscale x 2 x i8> %r
@@ -99,9 +103,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e8, mf2, ta, mu
 ; CHECK-NEXT:    vmv.s.x v9, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v9, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 4 x i8> %v, i8 %elt, i32 %idx
   ret <vscale x 4 x i8> %r
@@ -134,9 +140,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, mu
 ; CHECK-NEXT:    vmv.s.x v9, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v9, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 8 x i8> %v, i8 %elt, i32 %idx
   ret <vscale x 8 x i8> %r
@@ -169,9 +177,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e8, m2, ta, mu
 ; CHECK-NEXT:    vmv.s.x v10, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v10, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v10, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 16 x i8> %v, i8 %elt, i32 %idx
   ret <vscale x 16 x i8> %r
@@ -204,9 +214,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e8, m4, ta, mu
 ; CHECK-NEXT:    vmv.s.x v12, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v12, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v12, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 32 x i8> %v, i8 %elt, i32 %idx
   ret <vscale x 32 x i8> %r
@@ -239,9 +251,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e8, m8, ta, mu
 ; CHECK-NEXT:    vmv.s.x v16, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v16, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v16, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 64 x i8> %v, i8 %elt, i32 %idx
   ret <vscale x 64 x i8> %r
@@ -274,9 +288,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmv.s.x v9, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v9, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 1 x i16> %v, i16 %elt, i32 %idx
   ret <vscale x 1 x i16> %r
@@ -309,9 +325,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmv.s.x v9, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v9, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 2 x i16> %v, i16 %elt, i32 %idx
   ret <vscale x 2 x i16> %r
@@ -344,9 +362,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, mu
 ; CHECK-NEXT:    vmv.s.x v9, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v9, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 4 x i16> %v, i16 %elt, i32 %idx
   ret <vscale x 4 x i16> %r
@@ -379,9 +399,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv.s.x v10, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v10, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v10, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 8 x i16> %v, i16 %elt, i32 %idx
   ret <vscale x 8 x i16> %r
@@ -414,9 +436,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv.s.x v12, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v12, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v12, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 16 x i16> %v, i16 %elt, i32 %idx
   ret <vscale x 16 x i16> %r
@@ -449,9 +473,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e16, m8, ta, mu
 ; CHECK-NEXT:    vmv.s.x v16, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v16, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v16, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 32 x i16> %v, i16 %elt, i32 %idx
   ret <vscale x 32 x i16> %r
@@ -484,9 +510,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e32, mf2, ta, mu
 ; CHECK-NEXT:    vmv.s.x v9, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v9, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 1 x i32> %v, i32 %elt, i32 %idx
   ret <vscale x 1 x i32> %r
@@ -519,9 +547,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e32, m1, ta, mu
 ; CHECK-NEXT:    vmv.s.x v9, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v9, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v9, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 2 x i32> %v, i32 %elt, i32 %idx
   ret <vscale x 2 x i32> %r
@@ -554,9 +584,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv.s.x v10, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v10, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v10, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 4 x i32> %v, i32 %elt, i32 %idx
   ret <vscale x 4 x i32> %r
@@ -589,9 +621,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv.s.x v12, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v12, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v12, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 8 x i32> %v, i32 %elt, i32 %idx
   ret <vscale x 8 x i32> %r
@@ -624,9 +658,11 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e32, m8, ta, mu
 ; CHECK-NEXT:    vmv.s.x v16, a0
-; CHECK-NEXT:    addi a0, a1, 1
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vslideup.vx v8, v16, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    addi a1, a0, 1
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, tu, mu
+; CHECK-NEXT:    vslideup.vx v8, v16, a0
 ; CHECK-NEXT:    ret
   %r = insertelement <vscale x 16 x i32> %v, i32 %elt, i32 %idx
   ret <vscale x 16 x i32> %r
@@ -659,7 +695,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e64, m1, ta, mu
 ; CHECK-NEXT:    vmv.s.x v9, a0
-; CHECK-NEXT:    sext.w a0, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v9, a0
@@ -695,7 +732,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv.s.x v10, a0
-; CHECK-NEXT:    sext.w a0, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v10, a0
@@ -731,7 +769,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv.s.x v12, a0
-; CHECK-NEXT:    sext.w a0, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v12, a0
@@ -767,7 +806,8 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
 ; CHECK-NEXT:    vmv.s.x v16, a0
-; CHECK-NEXT:    sext.w a0, a1
+; CHECK-NEXT:    slli a0, a1, 32
+; CHECK-NEXT:    srli a0, a0, 32
 ; CHECK-NEXT:    addi a1, a0, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
 ; CHECK-NEXT:    vslideup.vx v8, v16, a0
diff --git a/llvm/test/CodeGen/VE/Vector/extract_elt.ll b/llvm/test/CodeGen/VE/Vector/extract_elt.ll
--- a/llvm/test/CodeGen/VE/Vector/extract_elt.ll
+++ b/llvm/test/CodeGen/VE/Vector/extract_elt.ll
@@ -6,6 +6,7 @@
 define fastcc i64 @extract_rr_v256i64(i32 signext %idx, <256 x i64> %v) {
 ; CHECK-LABEL: extract_rr_v256i64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    lvs %s0, %v0(%s0)
 ; CHECK-NEXT:    b.l.t (, %s10)
   %ret = extractelement <256 x i64> %v, i32 %idx
@@ -45,6 +46,7 @@
 define fastcc i32 @extract_rr_v256i32(i32 signext %idx, <256 x i32> %v) {
 ; CHECK-LABEL: extract_rr_v256i32:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    lvs %s0, %v0(%s0)
 ; CHECK-NEXT:    b.l.t (, %s10)
   %ret = extractelement <256 x i32> %v, i32 %idx
@@ -84,7 +86,10 @@
 define fastcc i32 @extract_rr_v512i32(<512 x i32> %v, i32 signext %idx) {
 ; CHECK-LABEL: extract_rr_v512i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    srl %s1, %s0, 1
+; CHECK-NEXT:    lea %s1, -2
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    and %s1, %s0, %s1
+; CHECK-NEXT:    srl %s1, %s1, 1
 ; CHECK-NEXT:    lvs %s1, %v0(%s1)
 ; CHECK-NEXT:    nnd %s0, %s0, (63)0
 ; CHECK-NEXT:    sla.w.sx %s0, %s0, 5
@@ -100,6 +105,7 @@
 define fastcc double @extract_rr_v256f64(i32 signext %idx, <256 x double> %v) {
 ; CHECK-LABEL: extract_rr_v256f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    lvs %s0, %v0(%s0)
 ; CHECK-NEXT:    b.l.t (, %s10)
   %ret = extractelement <256 x double> %v, i32 %idx
@@ -139,6 +145,7 @@
 define fastcc float @extract_rr_v256f32(i32 signext %idx, <256 x float> %v) {
 ; CHECK-LABEL: extract_rr_v256f32:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    lvs %s0, %v0(%s0)
 ; CHECK-NEXT:    b.l.t (, %s10)
   %ret = extractelement <256 x float> %v, i32 %idx
@@ -179,7 +186,10 @@
 define fastcc float @extract_rr_v512f32(<512 x float> %v, i32 signext %idx) {
 ; CHECK-LABEL: extract_rr_v512f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    srl %s1, %s0, 1
+; CHECK-NEXT:    lea %s1, -2
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    and %s1, %s0, %s1
+; CHECK-NEXT:    srl %s1, %s1, 1
 ; CHECK-NEXT:    lvs %s1, %v0(%s1)
 ; CHECK-NEXT:    nnd %s0, %s0, (63)0
 ; CHECK-NEXT:    sla.w.sx %s0, %s0, 5
diff --git a/llvm/test/CodeGen/VE/Vector/insert_elt.ll b/llvm/test/CodeGen/VE/Vector/insert_elt.ll
--- a/llvm/test/CodeGen/VE/Vector/insert_elt.ll
+++ b/llvm/test/CodeGen/VE/Vector/insert_elt.ll
@@ -6,6 +6,7 @@
 define fastcc <256 x i64> @insert_rr_v256i64(i32 signext %idx, i64 %s) {
 ; CHECK-LABEL: insert_rr_v256i64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    lsv %v0(%s0), %s1
 ; CHECK-NEXT:    b.l.t (, %s10)
   %ret = insertelement <256 x i64> undef, i64 %s, i32 %idx
@@ -46,6 +47,7 @@
 ; CHECK-LABEL: insert_rr_v256i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    lsv %v0(%s0), %s1
 ; CHECK-NEXT:    b.l.t (, %s10)
   %ret = insertelement <256 x i32> undef, i32 %s, i32 %idx
@@ -94,6 +96,9 @@
 ; CHECK-NEXT:    nnd %s2, %s0, (63)0
 ; CHECK-NEXT:    sla.w.sx %s2, %s2, 5
 ; CHECK-NEXT:    sll %s1, %s1, %s2
+; CHECK-NEXT:    lea %s3, -2
+; CHECK-NEXT:    and %s3, %s3, (32)0
+; CHECK-NEXT:    and %s0, %s0, %s3
 ; CHECK-NEXT:    srl %s0, %s0, 1
 ; CHECK-NEXT:    lvs %s3, %v0(%s0)
 ; CHECK-NEXT:    srl %s2, (32)1, %s2
@@ -110,6 +115,7 @@
 define fastcc <256 x double> @insert_rr_v256f64(i32 signext %idx, double %s) {
 ; CHECK-LABEL: insert_rr_v256f64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    lsv %v0(%s0), %s1
 ; CHECK-NEXT:    b.l.t (, %s10)
   %ret = insertelement <256 x double> undef, double %s, i32 %idx
@@ -149,6 +155,7 @@
 define fastcc <256 x float> @insert_rr_v256f32(i32 signext %idx, float %s) {
 ; CHECK-LABEL: insert_rr_v256f32:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    lsv %v0(%s0), %s1
 ; CHECK-NEXT:    b.l.t (, %s10)
   %ret = insertelement <256 x float> undef, float %s, i32 %idx
@@ -193,7 +200,10 @@
 ; CHECK-LABEL: insert_rr_v512f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sra.l %s1, %s1, 32
-; CHECK-NEXT:    srl %s2, %s0, 1
+; CHECK-NEXT:    lea %s2, -2
+; CHECK-NEXT:    and %s2, %s2, (32)0
+; CHECK-NEXT:    and %s2, %s0, %s2
+; CHECK-NEXT:    srl %s2, %s2, 1
 ; CHECK-NEXT:    lvs %s3, %v0(%s2)
 ; CHECK-NEXT:    nnd %s0, %s0, (63)0
 ; CHECK-NEXT:    sla.w.sx %s0, %s0, 5
diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
--- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
@@ -97,8 +97,17 @@
 ; CHECK-LABEL: swizzle_one_i8x16:
 ; CHECK:         .functype swizzle_one_i8x16 (v128, v128) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    i8x16.swizzle $push0=, $0, $1
-; CHECK-NEXT:    return $pop0
+; CHECK-NEXT:    global.get $push5=, __stack_pointer
+; CHECK-NEXT:    i32.const $push6=, 16
+; CHECK-NEXT:    i32.sub $push8=, $pop5, $pop6
+; CHECK-NEXT:    local.tee $push7=, $2=, $pop8
+; CHECK-NEXT:    v128.store 0($pop7), $0
+; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $1, 0
+; CHECK-NEXT:    i32.const $push1=, 15
+; CHECK-NEXT:    i32.and $push2=, $pop0, $pop1
+; CHECK-NEXT:    i32.or $push3=, $2, $pop2
+; CHECK-NEXT:    v128.load8_splat $push4=, 0($pop3)
+; CHECK-NEXT:    return $pop4
   %m0 = extractelement <16 x i8> %mask, i32 0
   %s0 = extractelement <16 x i8> %src, i8 %m0
   %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
@@ -109,8 +118,107 @@
 ; CHECK-LABEL: swizzle_all_i8x16:
 ; CHECK:         .functype swizzle_all_i8x16 (v128, v128) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    i8x16.swizzle $push0=, $0, $1
-; CHECK-NEXT:    return $pop0
+; CHECK-NEXT:    global.get $push80=, __stack_pointer
+; CHECK-NEXT:    i32.const $push81=, 16
+; CHECK-NEXT:    i32.sub $push98=, $pop80, $pop81
+; CHECK-NEXT:    local.tee $push97=, $2=, $pop98
+; CHECK-NEXT:    v128.store 0($pop97), $0
+; CHECK-NEXT:    i8x16.extract_lane_u $push61=, $1, 0
+; CHECK-NEXT:    i32.const $push1=, 15
+; CHECK-NEXT:    i32.and $push62=, $pop61, $pop1
+; CHECK-NEXT:    i32.or $push63=, $2, $pop62
+; CHECK-NEXT:    v128.load8_splat $push64=, 0($pop63)
+; CHECK-NEXT:    i8x16.extract_lane_u $push57=, $1, 1
+; CHECK-NEXT:    i32.const $push96=, 15
+; CHECK-NEXT:    i32.and $push58=, $pop57, $pop96
+; CHECK-NEXT:    i32.or $push59=, $2, $pop58
+; CHECK-NEXT:    i32.load8_u $push60=, 0($pop59)
+; CHECK-NEXT:    i8x16.replace_lane $push65=, $pop64, 1, $pop60
+; CHECK-NEXT:    i8x16.extract_lane_u $push53=, $1, 2
+; CHECK-NEXT:    i32.const $push95=, 15
+; CHECK-NEXT:    i32.and $push54=, $pop53, $pop95
+; CHECK-NEXT:    i32.or $push55=, $2, $pop54
+; CHECK-NEXT:    i32.load8_u $push56=, 0($pop55)
+; CHECK-NEXT:    i8x16.replace_lane $push66=, $pop65, 2, $pop56
+; CHECK-NEXT:    i8x16.extract_lane_u $push49=, $1, 3
+; CHECK-NEXT:    i32.const $push94=, 15
+; CHECK-NEXT:    i32.and $push50=, $pop49, $pop94
+; CHECK-NEXT:    i32.or $push51=, $2, $pop50
+; CHECK-NEXT:    i32.load8_u $push52=, 0($pop51)
+; CHECK-NEXT:    i8x16.replace_lane $push67=, $pop66, 3, $pop52
+; CHECK-NEXT:    i8x16.extract_lane_u $push45=, $1, 4
+; CHECK-NEXT:    i32.const $push93=, 15
+; CHECK-NEXT:    i32.and $push46=, $pop45, $pop93
+; CHECK-NEXT:    i32.or $push47=, $2, $pop46
+; CHECK-NEXT:    i32.load8_u $push48=, 0($pop47)
+; CHECK-NEXT:    i8x16.replace_lane $push68=, $pop67, 4, $pop48
+; CHECK-NEXT:    i8x16.extract_lane_u $push41=, $1, 5
+; CHECK-NEXT:    i32.const $push92=, 15
+; CHECK-NEXT:    i32.and $push42=, $pop41, $pop92
+; CHECK-NEXT:    i32.or $push43=, $2, $pop42
+; CHECK-NEXT:    i32.load8_u $push44=, 0($pop43)
+; CHECK-NEXT:    i8x16.replace_lane $push69=, $pop68, 5, $pop44
+; CHECK-NEXT:    i8x16.extract_lane_u $push37=, $1, 6
+; CHECK-NEXT:    i32.const $push91=, 15
+; CHECK-NEXT:    i32.and $push38=, $pop37, $pop91
+; CHECK-NEXT:    i32.or $push39=, $2, $pop38
+; CHECK-NEXT:    i32.load8_u $push40=, 0($pop39)
+; CHECK-NEXT:    i8x16.replace_lane $push70=, $pop69, 6, $pop40
+; CHECK-NEXT:    i8x16.extract_lane_u $push33=, $1, 7
+; CHECK-NEXT:    i32.const $push90=, 15
+; CHECK-NEXT:    i32.and $push34=, $pop33, $pop90
+; CHECK-NEXT:    i32.or $push35=, $2, $pop34
+; CHECK-NEXT:    i32.load8_u $push36=, 0($pop35)
+; CHECK-NEXT:    i8x16.replace_lane $push71=, $pop70, 7, $pop36
+; CHECK-NEXT:    i8x16.extract_lane_u $push29=, $1, 8
+; CHECK-NEXT:    i32.const $push89=, 15
+; CHECK-NEXT:    i32.and $push30=, $pop29, $pop89
+; CHECK-NEXT:    i32.or $push31=, $2, $pop30
+; CHECK-NEXT:    i32.load8_u $push32=, 0($pop31)
+; CHECK-NEXT:    i8x16.replace_lane $push72=, $pop71, 8, $pop32
+; CHECK-NEXT:    i8x16.extract_lane_u $push25=, $1, 9
+; CHECK-NEXT:    i32.const $push88=, 15
+; CHECK-NEXT:    i32.and $push26=, $pop25, $pop88
+; CHECK-NEXT:    i32.or $push27=, $2, $pop26
+; CHECK-NEXT:    i32.load8_u $push28=, 0($pop27)
+; CHECK-NEXT:    i8x16.replace_lane $push73=, $pop72, 9, $pop28
+; CHECK-NEXT:    i8x16.extract_lane_u $push21=, $1, 10
+; CHECK-NEXT:    i32.const $push87=, 15
+; CHECK-NEXT:    i32.and $push22=, $pop21, $pop87
+; CHECK-NEXT:    i32.or $push23=, $2, $pop22
+; CHECK-NEXT:    i32.load8_u $push24=, 0($pop23)
+; CHECK-NEXT:    i8x16.replace_lane $push74=, $pop73, 10, $pop24
+; CHECK-NEXT:    i8x16.extract_lane_u $push17=, $1, 11
+; CHECK-NEXT:    i32.const $push86=, 15
+; CHECK-NEXT:    i32.and $push18=, $pop17, $pop86
+; CHECK-NEXT:    i32.or $push19=, $2, $pop18
+; CHECK-NEXT:    i32.load8_u $push20=, 0($pop19)
+; CHECK-NEXT:    i8x16.replace_lane $push75=, $pop74, 11, $pop20
+; CHECK-NEXT:    i8x16.extract_lane_u $push13=, $1, 12
+; CHECK-NEXT:    i32.const $push85=, 15
+; CHECK-NEXT:    i32.and $push14=, $pop13, $pop85
+; CHECK-NEXT:    i32.or $push15=, $2, $pop14
+; CHECK-NEXT:    i32.load8_u $push16=, 0($pop15)
+; CHECK-NEXT:    i8x16.replace_lane $push76=, $pop75, 12, $pop16
+; CHECK-NEXT:    i8x16.extract_lane_u $push9=, $1, 13
+; CHECK-NEXT:    i32.const $push84=, 15
+; CHECK-NEXT:    i32.and $push10=, $pop9, $pop84
+; CHECK-NEXT:    i32.or $push11=, $2, $pop10
+; CHECK-NEXT:    i32.load8_u $push12=, 0($pop11)
+; CHECK-NEXT:    i8x16.replace_lane $push77=, $pop76, 13, $pop12
+; CHECK-NEXT:    i8x16.extract_lane_u $push5=, $1, 14
+; CHECK-NEXT:    i32.const $push83=, 15
+; CHECK-NEXT:    i32.and $push6=, $pop5, $pop83
+; CHECK-NEXT:    i32.or $push7=, $2, $pop6
+; CHECK-NEXT:    i32.load8_u $push8=, 0($pop7)
+; CHECK-NEXT:    i8x16.replace_lane $push78=, $pop77, 14, $pop8
+; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $1, 15
+; CHECK-NEXT:    i32.const $push82=, 15
+; CHECK-NEXT:    i32.and $push2=, $pop0, $pop82
+; CHECK-NEXT:    i32.or $push3=, $2, $pop2
+; CHECK-NEXT:    i32.load8_u $push4=, 0($pop3)
+; CHECK-NEXT:    i8x16.replace_lane $push79=, $pop78, 15, $pop4
+; CHECK-NEXT:    return $pop79
   %m0 = extractelement <16 x i8> %mask, i32 0
   %s0 = extractelement <16 x i8> %src, i8 %m0
   %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
@@ -210,14 +318,25 @@
 ; CHECK-LABEL: mashup_swizzle_i8x16:
 ; CHECK:         .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    i8x16.swizzle $push0=, $0, $1
-; CHECK-NEXT:    i8x16.replace_lane $push1=, $pop0, 3, $2
-; CHECK-NEXT:    i32.const $push2=, 42
-; CHECK-NEXT:    i8x16.replace_lane $push3=, $pop1, 4, $pop2
-; CHECK-NEXT:    i8x16.replace_lane $push4=, $pop3, 12, $2
-; CHECK-NEXT:    i32.const $push6=, 42
-; CHECK-NEXT:    i8x16.replace_lane $push5=, $pop4, 14, $pop6
-; CHECK-NEXT:    return $pop5
+; CHECK-NEXT:    global.get $push12=, __stack_pointer
+; CHECK-NEXT:    i32.const $push13=, 16
+; CHECK-NEXT:    i32.sub $push16=, $pop12, $pop13
+; CHECK-NEXT:    local.tee $push15=, $3=, $pop16
+; CHECK-NEXT:    v128.store 0($pop15), $0
+; CHECK-NEXT:    i8x16.extract_lane_u $push7=, $1, 7
+; CHECK-NEXT:    i32.const $push1=, 15
+; CHECK-NEXT:    i32.and $push8=, $pop7, $pop1
+; CHECK-NEXT:    i32.or $push9=, $3, $pop8
+; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $1, 0
+; CHECK-NEXT:    i32.const $push14=, 15
+; CHECK-NEXT:    i32.and $push2=, $pop0, $pop14
+; CHECK-NEXT:    i32.or $push3=, $3, $pop2
+; CHECK-NEXT:    v128.const $push4=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0
+; CHECK-NEXT:    v128.load8_lane $push5=, 0($pop3), $pop4, 0
+; CHECK-NEXT:    i8x16.replace_lane $push6=, $pop5, 3, $2
+; CHECK-NEXT:    v128.load8_lane $push10=, 0($pop9), $pop6, 7
+; CHECK-NEXT:    i8x16.replace_lane $push11=, $pop10, 12, $2
+; CHECK-NEXT:    return $pop11
   %m0 = extractelement <16 x i8> %mask, i32 0
   %s0 = extractelement <16 x i8> %src, i8 %m0
   %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
diff --git a/llvm/test/CodeGen/X86/extract-insert.ll b/llvm/test/CodeGen/X86/extract-insert.ll
--- a/llvm/test/CodeGen/X86/extract-insert.ll
+++ b/llvm/test/CodeGen/X86/extract-insert.ll
@@ -5,6 +5,7 @@
 define i32 @extractelt_undef_insertelt(i32 %x, i32 %y) {
 ; CHECK-LABEL: extractelt_undef_insertelt:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    ret{{[l|q]}}
   %b = insertelement <4 x i32> zeroinitializer, i32 %x, i64 3
   %c = icmp uge i32 %y, %y
diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll
--- a/llvm/test/CodeGen/X86/insertelement-var-index.ll
+++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll
@@ -996,7 +996,7 @@
 ;
 ; AVX512-LABEL: arg_i64_v2i64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    movslq %esi, %rax
+; AVX512-NEXT:    movl %esi, %eax
 ; AVX512-NEXT:    vpbroadcastq %rax, %xmm1
 ; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
 ; AVX512-NEXT:    vpbroadcastq %rdi, %xmm0 {%k1}
@@ -1101,7 +1101,7 @@
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    movapd %xmm0, %xmm2
 ; SSE41-NEXT:    movddup {{.*#+}} xmm1 = xmm1[0,0]
-; SSE41-NEXT:    movslq %edi, %rax
+; SSE41-NEXT:    movl %edi, %eax
 ; SSE41-NEXT:    movq %rax, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
 ; SSE41-NEXT:    pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -1112,7 +1112,7 @@
 ; AVX1-LABEL: arg_f64_v2f64:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
-; AVX1-NEXT:    movslq %edi, %rax
+; AVX1-NEXT:    movl %edi, %eax
 ; AVX1-NEXT:    vmovq %rax, %xmm2
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
 ; AVX1-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
@@ -1122,7 +1122,7 @@
 ; AVX2-LABEL: arg_f64_v2f64:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
-; AVX2-NEXT:    movslq %edi, %rax
+; AVX2-NEXT:    movl %edi, %eax
 ; AVX2-NEXT:    vmovq %rax, %xmm2
 ; AVX2-NEXT:    vpbroadcastq %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
@@ -1131,7 +1131,7 @@
 ;
 ; AVX512-LABEL: arg_f64_v2f64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    movslq %edi, %rax
+; AVX512-NEXT:    movl %edi, %eax
 ; AVX512-NEXT:    vpbroadcastq %rax, %xmm2
 ; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1
 ; AVX512-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
@@ -1346,7 +1346,7 @@
 ;
 ; AVX512-LABEL: load_i64_v2i64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    movslq %esi, %rax
+; AVX512-NEXT:    movl %esi, %eax
 ; AVX512-NEXT:    vpbroadcastq %rax, %xmm1
 ; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
 ; AVX512-NEXT:    vpbroadcastq (%rdi), %xmm0 {%k1}
@@ -1458,7 +1458,7 @@
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    movapd %xmm0, %xmm1
 ; SSE41-NEXT:    movddup {{.*#+}} xmm2 = mem[0,0]
-; SSE41-NEXT:    movslq %esi, %rax
+; SSE41-NEXT:    movl %esi, %eax
 ; SSE41-NEXT:    movq %rax, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
 ; SSE41-NEXT:    pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -1469,7 +1469,7 @@
 ; AVX1-LABEL: load_f64_v2f64:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
-; AVX1-NEXT:    movslq %esi, %rax
+; AVX1-NEXT:    movl %esi, %eax
 ; AVX1-NEXT:    vmovq %rax, %xmm2
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
 ; AVX1-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
@@ -1479,7 +1479,7 @@
 ; AVX2-LABEL: load_f64_v2f64:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
-; AVX2-NEXT:    movslq %esi, %rax
+; AVX2-NEXT:    movl %esi, %eax
 ; AVX2-NEXT:    vmovq %rax, %xmm2
 ; AVX2-NEXT:    vpbroadcastq %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
@@ -1488,7 +1488,7 @@
 ;
 ; AVX512-LABEL: load_f64_v2f64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    movslq %esi, %rax
+; AVX512-NEXT:    movl %esi, %eax
 ; AVX512-NEXT:    vpbroadcastq %rax, %xmm1
 ; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
 ; AVX512-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
@@ -1733,7 +1733,7 @@
 ;
 ; AVX512-LABEL: arg_i64_v4i64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    movslq %esi, %rax
+; AVX512-NEXT:    movl %esi, %eax
 ; AVX512-NEXT:    vpbroadcastq %rax, %ymm1
 ; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
 ; AVX512-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1}
@@ -1834,7 +1834,7 @@
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
-; AVX1-NEXT:    movslq %edi, %rax
+; AVX1-NEXT:    movl %edi, %eax
 ; AVX1-NEXT:    vmovq %rax, %xmm2
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
 ; AVX1-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
@@ -1846,7 +1846,7 @@
 ; AVX2-LABEL: arg_f64_v4f64:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vbroadcastsd %xmm1, %ymm1
-; AVX2-NEXT:    movslq %edi, %rax
+; AVX2-NEXT:    movl %edi, %eax
 ; AVX2-NEXT:    vmovq %rax, %xmm2
 ; AVX2-NEXT:    vpbroadcastq %xmm2, %ymm2
 ; AVX2-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
@@ -1855,7 +1855,7 @@
 ;
 ; AVX512-LABEL: arg_f64_v4f64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    movslq %edi, %rax
+; AVX512-NEXT:    movl %edi, %eax
 ; AVX512-NEXT:    vpbroadcastq %rax, %ymm2
 ; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %k1
 ; AVX512-NEXT:    vbroadcastsd %xmm1, %ymm0 {%k1}
@@ -2114,7 +2114,7 @@
 ;
 ; AVX512-LABEL: load_i64_v4i64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    movslq %esi, %rax
+; AVX512-NEXT:    movl %esi, %eax
 ; AVX512-NEXT:    vpbroadcastq %rax, %ymm1
 ; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
 ; AVX512-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1}
@@ -2218,7 +2218,7 @@
 ;
 ; AVX1-LABEL: load_f64_v4f64:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    movslq %esi, %rax
+; AVX1-NEXT:    movl %esi, %eax
 ; AVX1-NEXT:    vmovq %rax, %xmm1
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
 ; AVX1-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
@@ -2231,7 +2231,7 @@
 ; AVX2-LABEL: load_f64_v4f64:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm1
-; AVX2-NEXT:    movslq %esi, %rax
+; AVX2-NEXT:    movl %esi, %eax
 ; AVX2-NEXT:    vmovq %rax, %xmm2
 ; AVX2-NEXT:    vpbroadcastq %xmm2, %ymm2
 ; AVX2-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
@@ -2240,7 +2240,7 @@
 ;
 ; AVX512-LABEL: load_f64_v4f64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    movslq %esi, %rax
+; AVX512-NEXT:    movl %esi, %eax
 ; AVX512-NEXT:    vpbroadcastq %rax, %ymm1
 ; AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
 ; AVX512-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1}
@@ -2273,6 +2273,15 @@
 ; SSE-LABEL: PR44139:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movl (%rdi), %eax
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
+; SSE-NEXT:    movdqa %xmm0, 96(%rdi)
+; SSE-NEXT:    movdqa %xmm0, 112(%rdi)
+; SSE-NEXT:    movdqa %xmm0, 64(%rdi)
+; SSE-NEXT:    movdqa %xmm0, 80(%rdi)
+; SSE-NEXT:    movdqa %xmm0, 32(%rdi)
+; SSE-NEXT:    movdqa %xmm0, 48(%rdi)
+; SSE-NEXT:    movdqa %xmm0, (%rdi)
+; SSE-NEXT:    movdqa %xmm0, 16(%rdi)
 ; SSE-NEXT:    leal 2147483647(%rax), %ecx
 ; SSE-NEXT:    testl %eax, %eax
 ; SSE-NEXT:    cmovnsl %eax, %ecx
@@ -2283,23 +2292,51 @@
 ; SSE-NEXT:    divl %ecx
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: PR44139:
-; AVX:       # %bb.0:
-; AVX-NEXT:    movl (%rdi), %eax
-; AVX-NEXT:    leal 2147483647(%rax), %ecx
-; AVX-NEXT:    testl %eax, %eax
-; AVX-NEXT:    cmovnsl %eax, %ecx
-; AVX-NEXT:    andl $-2147483648, %ecx # imm = 0x80000000
-; AVX-NEXT:    addl %eax, %ecx
-; AVX-NEXT:    # kill: def $eax killed $eax killed $rax
-; AVX-NEXT:    xorl %edx, %edx
-; AVX-NEXT:    divl %ecx
-; AVX-NEXT:    retq
+; AVX1OR2-LABEL: PR44139:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    vbroadcastsd (%rdi), %ymm0
+; AVX1OR2-NEXT:    movl (%rdi), %eax
+; AVX1OR2-NEXT:    vmovaps %ymm0, 64(%rdi)
+; AVX1OR2-NEXT:    vmovaps %ymm0, 96(%rdi)
+; AVX1OR2-NEXT:    vmovaps %ymm0, (%rdi)
+; AVX1OR2-NEXT:    vmovaps %ymm0, 32(%rdi)
+; AVX1OR2-NEXT:    leal 2147483647(%rax), %ecx
+; AVX1OR2-NEXT:    testl %eax, %eax
+; AVX1OR2-NEXT:    cmovnsl %eax, %ecx
+; AVX1OR2-NEXT:    andl $-2147483648, %ecx # imm = 0x80000000
+; AVX1OR2-NEXT:    addl %eax, %ecx
+; AVX1OR2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX1OR2-NEXT:    xorl %edx, %edx
+; AVX1OR2-NEXT:    divl %ecx
+; AVX1OR2-NEXT:    vzeroupper
+; AVX1OR2-NEXT:    retq
+;
+; AVX512-LABEL: PR44139:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vbroadcastsd (%rdi), %zmm0
+; AVX512-NEXT:    movl (%rdi), %eax
+; AVX512-NEXT:    vmovaps %zmm0, (%rdi)
+; AVX512-NEXT:    vmovaps %zmm0, 64(%rdi)
+; AVX512-NEXT:    leal 2147483647(%rax), %ecx
+; AVX512-NEXT:    testl %eax, %eax
+; AVX512-NEXT:    cmovnsl %eax, %ecx
+; AVX512-NEXT:    andl $-2147483648, %ecx # imm = 0x80000000
+; AVX512-NEXT:    addl %eax, %ecx
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    xorl %edx, %edx
+; AVX512-NEXT:    divl %ecx
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
 ;
 ; X86AVX2-LABEL: PR44139:
 ; X86AVX2:       # %bb.0:
-; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86AVX2-NEXT:    movl (%eax), %eax
+; X86AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86AVX2-NEXT:    movl (%ecx), %eax
+; X86AVX2-NEXT:    vbroadcastsd (%ecx), %ymm0
+; X86AVX2-NEXT:    vmovaps %ymm0, 64(%ecx)
+; X86AVX2-NEXT:    vmovaps %ymm0, 96(%ecx)
+; X86AVX2-NEXT:    vmovaps %ymm0, (%ecx)
+; X86AVX2-NEXT:    vmovaps %ymm0, 32(%ecx)
 ; X86AVX2-NEXT:    leal 2147483647(%eax), %ecx
 ; X86AVX2-NEXT:    testl %eax, %eax
 ; X86AVX2-NEXT:    cmovnsl %eax, %ecx
@@ -2307,6 +2344,7 @@
 ; X86AVX2-NEXT:    addl %eax, %ecx
 ; X86AVX2-NEXT:    xorl %edx, %edx
 ; X86AVX2-NEXT:    divl %ecx
+; X86AVX2-NEXT:    vzeroupper
 ; X86AVX2-NEXT:    retl
   %L = load <16 x i64>, ptr %p
   %E1 = extractelement <16 x i64> %L, i64 0
diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll
--- a/llvm/test/CodeGen/X86/var-permute-128.ll
+++ b/llvm/test/CodeGen/X86/var-permute-128.ll
@@ -129,7 +129,7 @@
 define <8 x i16> @var_shuffle_v8i16(<8 x i16> %v, <8 x i16> %indices) nounwind {
 ; SSE3-LABEL: var_shuffle_v8i16:
 ; SSE3:       # %bb.0:
-; SSE3-NEXT:    movd %xmm1, %r8d
+; SSE3-NEXT:    pextrw $0, %xmm1, %r8d
 ; SSE3-NEXT:    pextrw $1, %xmm1, %r9d
 ; SSE3-NEXT:    pextrw $2, %xmm1, %r10d
 ; SSE3-NEXT:    pextrw $3, %xmm1, %esi
diff --git a/llvm/test/CodeGen/X86/var-permute-512.ll b/llvm/test/CodeGen/X86/var-permute-512.ll
--- a/llvm/test/CodeGen/X86/var-permute-512.ll
+++ b/llvm/test/CodeGen/X86/var-permute-512.ll
@@ -101,7 +101,7 @@
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
 ; AVX512F-NEXT:    vextracti32x4 $3, %zmm1, %xmm4
-; AVX512F-NEXT:    vmovd %xmm4, %eax
+; AVX512F-NEXT:    vpextrw $0, %xmm4, %eax
 ; AVX512F-NEXT:    vmovaps %zmm0, (%rsp)
 ; AVX512F-NEXT:    andl $31, %eax
 ; AVX512F-NEXT:    movzwl (%rsp,%rax,2), %eax
@@ -127,7 +127,7 @@
 ; AVX512F-NEXT:    vpextrw $7, %xmm4, %eax
 ; AVX512F-NEXT:    andl $31, %eax
 ; AVX512F-NEXT:    vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0
-; AVX512F-NEXT:    vmovd %xmm3, %eax
+; AVX512F-NEXT:    vpextrw $0, %xmm3, %eax
 ; AVX512F-NEXT:    andl $31, %eax
 ; AVX512F-NEXT:    movzwl (%rsp,%rax,2), %eax
 ; AVX512F-NEXT:    vmovd %eax, %xmm4
@@ -152,7 +152,7 @@
 ; AVX512F-NEXT:    vpextrw $7, %xmm3, %eax
 ; AVX512F-NEXT:    andl $31, %eax
 ; AVX512F-NEXT:    vpinsrw $7, (%rsp,%rax,2), %xmm4, %xmm3
-; AVX512F-NEXT:    vmovd %xmm2, %eax
+; AVX512F-NEXT:    vpextrw $0, %xmm2, %eax
 ; AVX512F-NEXT:    andl $31, %eax
 ; AVX512F-NEXT:    movzwl (%rsp,%rax,2), %eax
 ; AVX512F-NEXT:    vmovd %eax, %xmm4
@@ -180,7 +180,7 @@
 ; AVX512F-NEXT:    andl $31, %eax
 ; AVX512F-NEXT:    movzwl (%rsp,%rax,2), %eax
 ; AVX512F-NEXT:    vpinsrw $7, %eax, %xmm4, %xmm2
-; AVX512F-NEXT:    vmovd %xmm1, %eax
+; AVX512F-NEXT:    vpextrw $0, %xmm1, %eax
 ; AVX512F-NEXT:    andl $31, %eax
 ; AVX512F-NEXT:    movzwl (%rsp,%rax,2), %eax
 ; AVX512F-NEXT:    vmovd %eax, %xmm4
@@ -330,7 +330,7 @@
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
 ; AVX512F-NEXT:    vextracti32x4 $3, %zmm1, %xmm4
-; AVX512F-NEXT:    vmovd %xmm4, %eax
+; AVX512F-NEXT:    vpextrb $0, %xmm4, %eax
 ; AVX512F-NEXT:    vmovaps %zmm0, (%rsp)
 ; AVX512F-NEXT:    andl $63, %eax
 ; AVX512F-NEXT:    movzbl (%rsp,%rax), %eax
@@ -380,7 +380,7 @@
 ; AVX512F-NEXT:    vpextrb $15, %xmm4, %eax
 ; AVX512F-NEXT:    andl $63, %eax
 ; AVX512F-NEXT:    vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
-; AVX512F-NEXT:    vmovd %xmm3, %eax
+; AVX512F-NEXT:    vpextrb $0, %xmm3, %eax
 ; AVX512F-NEXT:    andl $63, %eax
 ; AVX512F-NEXT:    movzbl (%rsp,%rax), %eax
 ; AVX512F-NEXT:    vmovd %eax, %xmm4
@@ -432,7 +432,7 @@
 ; AVX512F-NEXT:    andl $63, %eax
 ; AVX512F-NEXT:    movzbl (%rsp,%rax), %eax
 ; AVX512F-NEXT:    vpinsrb $15, %eax, %xmm4, %xmm3
-; AVX512F-NEXT:    vmovd %xmm2, %eax
+; AVX512F-NEXT:    vpextrb $0, %xmm2, %eax
 ; AVX512F-NEXT:    andl $63, %eax
 ; AVX512F-NEXT:    movzbl (%rsp,%rax), %eax
 ; AVX512F-NEXT:    vmovd %eax, %xmm4
@@ -485,7 +485,7 @@
 ; AVX512F-NEXT:    andl $63, %eax
 ; AVX512F-NEXT:    movzbl (%rsp,%rax), %eax
 ; AVX512F-NEXT:    vpinsrb $15, %eax, %xmm4, %xmm2
-; AVX512F-NEXT:    vmovd %xmm1, %eax
+; AVX512F-NEXT:    vpextrb $0, %xmm1, %eax
 ; AVX512F-NEXT:    andl $63, %eax
 ; AVX512F-NEXT:    movzbl (%rsp,%rax), %eax
 ; AVX512F-NEXT:    vmovd %eax, %xmm4
@@ -555,7 +555,7 @@
 ; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512BW-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
 ; AVX512BW-NEXT:    vextracti32x4 $3, %zmm1, %xmm4
-; AVX512BW-NEXT:    vmovd %xmm4, %eax
+; AVX512BW-NEXT:    vpextrb $0, %xmm4, %eax
 ; AVX512BW-NEXT:    vmovaps %zmm0, (%rsp)
 ; AVX512BW-NEXT:    andl $63, %eax
 ; AVX512BW-NEXT:    movzbl (%rsp,%rax), %eax
@@ -605,7 +605,7 @@
 ; AVX512BW-NEXT:    vpextrb $15, %xmm4, %eax
 ; AVX512BW-NEXT:    andl $63, %eax
 ; AVX512BW-NEXT:    vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
-; AVX512BW-NEXT:    vmovd %xmm3, %eax
+; AVX512BW-NEXT:    vpextrb $0, %xmm3, %eax
 ; AVX512BW-NEXT:    andl $63, %eax
 ; AVX512BW-NEXT:    movzbl (%rsp,%rax), %eax
 ; AVX512BW-NEXT:    vmovd %eax, %xmm4
@@ -657,7 +657,7 @@
 ; AVX512BW-NEXT:    andl $63, %eax
 ; AVX512BW-NEXT:    movzbl (%rsp,%rax), %eax
 ; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm4, %xmm3
-; AVX512BW-NEXT:    vmovd %xmm2, %eax
+; AVX512BW-NEXT:    vpextrb $0, %xmm2, %eax
 ; AVX512BW-NEXT:    andl $63, %eax
 ; AVX512BW-NEXT:    movzbl (%rsp,%rax), %eax
 ; AVX512BW-NEXT:    vmovd %eax, %xmm4
@@ -710,7 +710,7 @@
 ; AVX512BW-NEXT:    andl $63, %eax
 ; AVX512BW-NEXT:    movzbl (%rsp,%rax), %eax
 ; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm4, %xmm2
-; AVX512BW-NEXT:    vmovd %xmm1, %eax
+; AVX512BW-NEXT:    vpextrb $0, %xmm1, %eax
 ; AVX512BW-NEXT:    andl $63, %eax
 ; AVX512BW-NEXT:    movzbl (%rsp,%rax), %eax
 ; AVX512BW-NEXT:    vmovd %eax, %xmm4
diff --git a/llvm/test/CodeGen/X86/vec_extract.ll b/llvm/test/CodeGen/X86/vec_extract.ll
--- a/llvm/test/CodeGen/X86/vec_extract.ll
+++ b/llvm/test/CodeGen/X86/vec_extract.ll
@@ -110,11 +110,15 @@
 ; X32-LABEL: ossfuzz15662:
 ; X32:       # %bb.0:
 ; X32-NEXT:    xorps %xmm0, %xmm0
+; X32-NEXT:    movaps %xmm0, (%eax)
+; X32-NEXT:    xorps %xmm0, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: ossfuzz15662:
 ; X64:       # %bb.0:
 ; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    movaps %xmm0, (%rax)
+; X64-NEXT:    xorps %xmm0, %xmm0
 ; X64-NEXT:    retq
    %C10 = icmp ule i1 false, false
    %C3 = icmp ule i1 true, undef