diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -9699,7 +9699,7 @@ The first operand of an '``extractelement``' instruction is a value of :ref:`vector ` type. The second operand is an index indicating the position from which to extract the element. The index may be a -variable of any integer type. +variable of any integer type, and will be treated as an unsigned integer. Semantics: """""""""" @@ -9744,7 +9744,8 @@ :ref:`vector ` type. The second operand is a scalar value whose type must equal the element type of the first operand. The third operand is an index indicating the position at which to insert the value. The -index may be a variable of any integer type. +index may be a variable of any integer type, and will be treated as an +unsigned integer. Semantics: """""""""" diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2813,7 +2813,7 @@ Register Idx; if (auto *CI = dyn_cast(U.getOperand(1))) { if (CI->getBitWidth() != PreferredVecIdxWidth) { - APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth); + APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth); auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx); Idx = getOrCreateVReg(*NewIdxCI); } @@ -2822,7 +2822,7 @@ Idx = getOrCreateVReg(*U.getOperand(1)); if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) { const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); - Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx).getReg(0); + Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0); } MIRBuilder.buildExtractVectorElement(Res, Val, Idx); return true; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3582,7 +3582,7 @@ const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); - SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), + SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), @@ -3592,7 +3592,7 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); - SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), + SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -1549,7 +1549,7 @@ ; CHECK-LABEL: name: test_extractelement ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0 ; CHECK: [[IDX:%[0-9]+]]:_(s32) = COPY $w0 -; CHECK: [[IDXEXT:%[0-9]+]]:_(s64) = G_SEXT [[IDX]] +; CHECK: [[IDXEXT:%[0-9]+]]:_(s64) = G_ZEXT [[IDX]] ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDXEXT]](s64) ; CHECK: $w0 = COPY [[RES]](s32) %res = extractelement <2 x i32> %vec, i32 %idx @@ -1566,6 +1566,27 @@ ret i32 %res } +define i32 @test_extractelement_const_idx_zext_i1(<2 x i32> %vec) { +; CHECK-LABEL: name: test_extractelement +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0 +; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s64) +; CHECK: $w0 = COPY [[RES]](s32) + %res = extractelement <2 x i32> %vec, i1 true + ret i32 %res +} + +define i32 @test_extractelement_const_idx_zext_i8(<2 x i32> %vec) { +; CHECK-LABEL: name: test_extractelement +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0 +; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s64) +; CHECK: $w0 = COPY [[RES]](s32) + %res = extractelement <2 x i32> %vec, i8 255 + ret i32 %res +} + + define i32 @test_singleelementvector(i32 %elt){ ; CHECK-LABEL: name: test_singleelementvector ; CHECK: [[ELT:%[0-9]+]]:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -9143,8 +9143,7 @@ define i32 @load_single_extract_variable_index_v3i32_small_align(<3 x i32>* %A, i32 %idx) { ; CHECK-LABEL: load_single_extract_variable_index_v3i32_small_align: ; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: mov w9, w1 ; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: cmp x9, #2 ; CHECK-NEXT: csel x8, x9, x8, lo @@ -9158,8 +9157,7 @@ define i32 @load_single_extract_variable_index_v3i32_default_align(<3 x i32>* %A, i32 %idx) { ; CHECK-LABEL: load_single_extract_variable_index_v3i32_default_align: ; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: mov w9, w1 ; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: cmp x9, #2 ; CHECK-NEXT: csel x8, x9, x8, lo diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll @@ -292,8 +292,7 @@ define i8 @test_lanex_16xi8( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.b, xzr, x8 ; CHECK-NEXT: lastb w0, p0, z0.b ; CHECK-NEXT: ret @@ -304,8 +303,7 @@ define i16 @test_lanex_8xi16( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_8xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.h, xzr, x8 ; CHECK-NEXT: lastb w0, p0, z0.h ; CHECK-NEXT: ret @@ -316,8 +314,7 @@ define i32 @test_lanex_4xi32( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.s, xzr, x8 ; CHECK-NEXT: lastb w0, p0, z0.s ; CHECK-NEXT: ret @@ -328,8 +325,7 @@ define i64 @test_lanex_2xi64( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb x0, p0, z0.d ; CHECK-NEXT: ret @@ -340,8 +336,7 @@ define half @test_lanex_8xf16( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_8xf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.h, xzr, x8 ; CHECK-NEXT: lastb h0, p0, z0.h ; CHECK-NEXT: ret @@ -352,8 +347,7 @@ define half @test_lanex_4xf16( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_4xf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.s, xzr, x8 ; CHECK-NEXT: lastb h0, p0, z0.h ; CHECK-NEXT: ret @@ -364,8 +358,7 @@ define half @test_lanex_2xf16( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_2xf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb h0, p0, z0.h ; CHECK-NEXT: ret @@ -376,8 +369,7 @@ define float @test_lanex_4xf32( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_4xf32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.s, xzr, x8 ; CHECK-NEXT: lastb s0, p0, z0.s ; CHECK-NEXT: ret @@ -388,8 +380,7 @@ define float @test_lanex_2xf32( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_2xf32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb s0, p0, z0.s ; CHECK-NEXT: ret @@ -400,8 +391,7 @@ define double @test_lanex_2xf64( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_2xf64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb d0, p0, z0.d ; CHECK-NEXT: ret @@ -518,8 +508,7 @@ define i1 @test_lanex_4xi1( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_4xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 ; CHECK-NEXT: whilels p0.s, xzr, x8 ; CHECK-NEXT: lastb w8, p0, z0.s diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -128,8 +128,7 @@ define @test_lanex_16xi8( %a, i32 %x) { ; CHECK-LABEL: test_lanex_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov w9, #30 ; CHECK-NEXT: index z2.b, #0, #1 ; CHECK-NEXT: ptrue p0.b @@ -389,8 +388,7 @@ define @test_predicate_insert_8xi1_immediate ( %val, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_8xi1_immediate: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov w9, #1 ; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: ptrue p1.h @@ -427,8 +425,7 @@ define @test_predicate_insert_2xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_2xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w8, w1 ; CHECK-NEXT: index z1.d, #0, #1 ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -446,8 +443,7 @@ define @test_predicate_insert_4xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_4xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w8, w1 ; CHECK-NEXT: index z1.s, #0, #1 ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, w8 @@ -463,8 +459,7 @@ define @test_predicate_insert_8xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_8xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w8, w1 ; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z0.h, w8 @@ -481,8 +476,7 @@ define @test_predicate_insert_16xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_16xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w8, w1 ; CHECK-NEXT: index z1.b, #0, #1 ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: mov z0.b, w8 @@ -505,8 +499,7 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: mov w9, w1 ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1b { z0.b }, p1, [sp, #1, mul vl] diff --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll @@ -6,8 +6,7 @@ define i32 @promote_extract_2i32_idx( %a, i32 %idx) { ; CHECK-LABEL: promote_extract_2i32_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb x0, p0, z0.d ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -25,8 +24,7 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1b { z0.b }, p0, [sp] @@ -51,8 +49,7 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1h { z0.h }, p0, [sp] @@ -77,8 +74,7 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: cnth x8 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmp x9, x8 @@ -103,8 +99,7 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: cnth x8 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmp x9, x8 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -march=amdgcn -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s + +define i8 @f_i1_1() { + ; CHECK-LABEL: name: f_i1_1 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<256 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %E1 = extractelement <256 x i8> undef, i1 true + ret i8 %E1 +} + +define i8 @f_i8_255() { + ; CHECK-LABEL: name: f_i8_255 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<256 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %E1 = extractelement <256 x i8> undef, i8 255 + ret i8 %E1 +} diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations.ll b/llvm/test/CodeGen/Mips/msa/basic_operations.ll --- a/llvm/test/CodeGen/Mips/msa/basic_operations.ll +++ b/llvm/test/CodeGen/Mips/msa/basic_operations.ll @@ -1315,7 +1315,7 @@ ; N64-NEXT: ld.b $w0, 0($2) ; N64-NEXT: addv.b $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.b $w0, $w0[$1] ; N64-NEXT: mfc1 $1, $f0 ; N64-NEXT: sra $1, $1, 24 @@ -1371,7 +1371,7 @@ ; N64-NEXT: ld.h $w0, 0($2) ; N64-NEXT: addv.h $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.h $w0, $w0[$1] ; N64-NEXT: mfc1 $1, $f0 ; N64-NEXT: sra $1, $1, 16 @@ -1423,7 +1423,7 @@ ; N64-NEXT: ld.w $w0, 0($2) ; N64-NEXT: addv.w $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.w $w0, $w0[$1] ; N64-NEXT: jr $ra ; N64-NEXT: mfc1 $2, $f0 @@ -1495,7 +1495,7 @@ ; N64-NEXT: ld.d $w0, 0($2) ; N64-NEXT: addv.d $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.d $w0, $w0[$1] ; N64-NEXT: jr $ra ; N64-NEXT: dmfc1 $2, $f0 @@ -1546,7 +1546,7 @@ ; N64-NEXT: ld.b $w0, 0($2) ; N64-NEXT: addv.b $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.b $w0, $w0[$1] ; N64-NEXT: mfc1 $1, $f0 ; N64-NEXT: jr $ra @@ -1599,7 +1599,7 @@ ; N64-NEXT: ld.h $w0, 0($2) ; N64-NEXT: addv.h $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.h $w0, $w0[$1] ; N64-NEXT: mfc1 $1, $f0 ; N64-NEXT: jr $ra @@ -1650,7 +1650,7 @@ ; N64-NEXT: ld.w $w0, 0($2) ; N64-NEXT: addv.w $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.w $w0, $w0[$1] ; N64-NEXT: jr $ra ; N64-NEXT: mfc1 $2, $f0 @@ -1722,7 +1722,7 @@ ; N64-NEXT: ld.d $w0, 0($2) ; N64-NEXT: addv.d $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.d $w0, $w0[$1] ; N64-NEXT: jr $ra ; N64-NEXT: dmfc1 $2, $f0 @@ -1934,7 +1934,7 @@ ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lw $2, 0($2) +; N64-NEXT: lwu $2, 0($2) ; N64-NEXT: ld $1, %got_disp(v16i8)($1) ; N64-NEXT: ld.b $w0, 0($1) ; N64-NEXT: sld.b $w0, $w0[$2] @@ -1994,7 +1994,7 @@ ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lw $2, 0($2) +; N64-NEXT: lwu $2, 0($2) ; N64-NEXT: ld $1, %got_disp(v8i16)($1) ; N64-NEXT: ld.h $w0, 0($1) ; N64-NEXT: dsll $2, $2, 1 @@ -2055,7 +2055,7 @@ ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lw $2, 0($2) +; N64-NEXT: lwu $2, 0($2) ; N64-NEXT: ld $1, %got_disp(v4i32)($1) ; N64-NEXT: ld.w $w0, 0($1) ; N64-NEXT: dsll $2, $2, 2 @@ -2124,7 +2124,7 @@ ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lw $2, 0($2) +; N64-NEXT: lwu $2, 0($2) ; N64-NEXT: ld $1, %got_disp(v2i64)($1) ; N64-NEXT: ld.d $w0, 0($1) ; N64-NEXT: dsll $2, $2, 3 diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll b/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll --- a/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll +++ b/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll @@ -193,10 +193,9 @@ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) %4 = extractelement <4 x float> %2, i32 %3 - ; ALL-DAG: splat.w $w0, [[R1]][[[IDX]]] + ; ALL-DAG: splat.w $w0, [[R1]][[[PTR_I]]] ret float %4 } @@ -259,10 +258,9 @@ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) %4 = extractelement <2 x double> %2, i32 %3 - ; ALL-DAG: splat.d $w0, [[R1]][[[IDX]]] + ; ALL-DAG: splat.d $w0, [[R1]][[[PTR_I]]] ret double %4 } @@ -312,11 +310,10 @@ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) %3 = insertelement <4 x float> %1, float %a, i32 %2 ; float argument passed in $f12 - ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2 + ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[PTR_I]], 2 ; ALL-DAG: sld.b [[R1]], [[R1]][[[BIDX]]] ; ALL-DAG: insve.w [[R1]][0], $w12[0] ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] @@ -341,11 +338,10 @@ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) %3 = insertelement <2 x double> %1, double %a, i32 %2 ; double argument passed in $f12 - ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 3 + ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[PTR_I]], 3 ; ALL-DAG: sld.b [[R1]], [[R1]][[[BIDX]]] ; ALL-DAG: insve.d [[R1]][0], $w12[0] ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] diff --git a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll --- a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll +++ b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll @@ -715,7 +715,6 @@ define double @conv2dlbTestuiVar(<4 x i32> %a, i32 zeroext %elem) { ; CHECK-64-LABEL: conv2dlbTestuiVar: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: extsw 3, 3 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-64-NEXT: vextuwlx 3, 3, 2 ; CHECK-64-NEXT: mtfprwz 0, 3 diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll --- a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll @@ -5,6 +5,7 @@ define zeroext i8 @test1(<16 x i8> %a, i32 signext %index) { ; CHECK-64-LABEL: test1: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: vextublx 3, 3, 2 ; CHECK-64-NEXT: clrldi 3, 3, 56 ; CHECK-64-NEXT: blr @@ -24,6 +25,7 @@ define signext i8 @test2(<16 x i8> %a, i32 signext %index) { ; CHECK-64-LABEL: test2: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: vextublx 3, 3, 2 ; CHECK-64-NEXT: extsb 3, 3 ; CHECK-64-NEXT: blr @@ -44,6 +46,7 @@ define zeroext i16 @test3(<8 x i16> %a, i32 signext %index) { ; CHECK-64-LABEL: test3: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-64-NEXT: vextuhlx 3, 3, 2 ; CHECK-64-NEXT: clrldi 3, 3, 48 @@ -64,6 +67,7 @@ define signext i16 @test4(<8 x i16> %a, i32 signext %index) { ; CHECK-64-LABEL: test4: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-64-NEXT: vextuhlx 3, 3, 2 ; CHECK-64-NEXT: extsh 3, 3 @@ -84,6 +88,7 @@ define zeroext i32 @test5(<4 x i32> %a, i32 signext %index) { ; CHECK-64-LABEL: test5: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-64-NEXT: vextuwlx 3, 3, 2 ; CHECK-64-NEXT: blr @@ -103,6 +108,7 @@ define signext i32 @test6(<4 x i32> %a, i32 signext %index) { ; CHECK-64-LABEL: test6: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-64-NEXT: vextuwlx 3, 3, 2 ; CHECK-64-NEXT: extsw 3, 3 diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll --- a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll @@ -5,6 +5,7 @@ define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) { ; CHECK-64-LABEL: test_add1: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: vextublx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 ; CHECK-64-NEXT: clrldi 3, 3, 56 @@ -31,6 +32,7 @@ define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) { ; CHECK-64-LABEL: test_add2: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: vextublx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 ; CHECK-64-NEXT: extsb 3, 3 @@ -57,6 +59,7 @@ define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) { ; CHECK-64-LABEL: test_add3: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-64-NEXT: vextuhlx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 @@ -84,6 +87,7 @@ define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) { ; CHECK-64-LABEL: test_add4: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-64-NEXT: vextuhlx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 @@ -111,6 +115,7 @@ define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) { ; CHECK-64-LABEL: test_add5: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-64-NEXT: vextuwlx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 @@ -134,6 +139,7 @@ define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) { ; CHECK-64-LABEL: test_add6: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-64-NEXT: vextuwlx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -266,8 +266,8 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) { ; CHECK-64-LABEL: testFloat1: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-DAG: rlwinm 3, 4, 2, 28, 29 -; CHECK-64-DAG: addi 4, 1, -16 +; CHECK-64-NEXT: rlwinm 3, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 4, 1, -16 ; CHECK-64-NEXT: stxv 34, -16(1) ; CHECK-64-NEXT: stfsx 1, 4, 3 ; CHECK-64-NEXT: lxv 34, -16(1) @@ -285,8 +285,7 @@ ; CHECK-64-P10-LABEL: testFloat1: ; CHECK-64-P10: # %bb.0: # %entry ; CHECK-64-P10-NEXT: xscvdpspn 35, 1 -; CHECK-64-P10-NEXT: extsw 3, 4 -; CHECK-64-P10-NEXT: slwi 3, 3, 2 +; CHECK-64-P10-NEXT: slwi 3, 4, 2 ; CHECK-64-P10-NEXT: vinswvlx 2, 3, 3 ; CHECK-64-P10-NEXT: blr ; @@ -305,16 +304,16 @@ ; CHECK-64-LABEL: testFloat2: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: lwz 6, 0(3) -; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29 -; CHECK-64-DAG: addi 7, 1, -16 +; CHECK-64-NEXT: addi 7, 1, -16 +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 ; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: rlwinm 5, 5, 2, 28, 29 ; CHECK-64-NEXT: stwx 6, 7, 4 -; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 -; CHECK-64-NEXT: addi 5, 1, -32 +; CHECK-64-NEXT: addi 4, 1, -32 ; CHECK-64-NEXT: lxv 0, -16(1) ; CHECK-64-NEXT: lwz 3, 1(3) ; CHECK-64-NEXT: stxv 0, -32(1) -; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: stwx 3, 4, 5 ; CHECK-64-NEXT: lxv 34, -32(1) ; CHECK-64-NEXT: blr ; @@ -337,12 +336,10 @@ ; CHECK-64-P10-LABEL: testFloat2: ; CHECK-64-P10: # %bb.0: # %entry ; CHECK-64-P10-NEXT: lwz 6, 0(3) -; CHECK-64-P10-NEXT: extsw 4, 4 ; CHECK-64-P10-NEXT: lwz 3, 1(3) ; CHECK-64-P10-NEXT: slwi 4, 4, 2 ; CHECK-64-P10-NEXT: vinswlx 2, 4, 6 -; CHECK-64-P10-NEXT: extsw 4, 5 -; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: slwi 4, 5, 2 ; CHECK-64-P10-NEXT: vinswlx 2, 4, 3 ; CHECK-64-P10-NEXT: blr ; @@ -368,8 +365,9 @@ ; CHECK-64-LABEL: testFloat3: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: lis 6, 1 -; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29 -; CHECK-64-DAG: addi 7, 1, -16 +; CHECK-64-NEXT: addi 7, 1, -16 +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: rlwinm 5, 5, 2, 28, 29 ; CHECK-64-NEXT: lwzx 6, 3, 6 ; CHECK-64-NEXT: stxv 34, -16(1) ; CHECK-64-NEXT: stwx 6, 7, 4 @@ -377,10 +375,9 @@ ; CHECK-64-NEXT: lxv 0, -16(1) ; CHECK-64-NEXT: rldic 4, 4, 36, 27 ; CHECK-64-NEXT: lwzx 3, 3, 4 -; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 -; CHECK-64-NEXT: addi 5, 1, -32 +; CHECK-64-NEXT: addi 4, 1, -32 ; CHECK-64-NEXT: stxv 0, -32(1) -; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: stwx 3, 4, 5 ; CHECK-64-NEXT: lxv 34, -32(1) ; CHECK-64-NEXT: blr ; @@ -404,14 +401,12 @@ ; CHECK-64-P10-LABEL: testFloat3: ; CHECK-64-P10: # %bb.0: # %entry ; CHECK-64-P10-NEXT: plwz 6, 65536(3), 0 -; CHECK-64-P10-NEXT: extsw 4, 4 ; CHECK-64-P10-NEXT: slwi 4, 4, 2 ; CHECK-64-P10-NEXT: vinswlx 2, 4, 6 ; CHECK-64-P10-NEXT: li 4, 1 ; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27 ; CHECK-64-P10-NEXT: lwzx 3, 3, 4 -; CHECK-64-P10-NEXT: extsw 4, 5 -; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: slwi 4, 5, 2 ; CHECK-64-P10-NEXT: vinswlx 2, 4, 3 ; CHECK-64-P10-NEXT: blr ; @@ -572,7 +567,7 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) { ; CHECK-64-LABEL: testDouble1: ; CHECK-64: # %bb.0: # %entry -; CHECK-64: rlwinm 3, 4, 3, 28, 28 +; CHECK-64-NEXT: rlwinm 3, 4, 3, 28, 28 ; CHECK-64-NEXT: addi 4, 1, -16 ; CHECK-64-NEXT: stxv 34, -16(1) ; CHECK-64-NEXT: stfdx 1, 4, 3 @@ -590,7 +585,6 @@ ; ; CHECK-64-P10-LABEL: testDouble1: ; CHECK-64-P10: # %bb.0: # %entry -; CHECK-64-P10-NEXT: extsw 4, 4 ; CHECK-64-P10-NEXT: mffprd 3, 1 ; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 ; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 @@ -598,8 +592,8 @@ ; ; CHECK-32-P10-LABEL: testDouble1: ; CHECK-32-P10: # %bb.0: # %entry -; CHECK-32-P10-DAG: addi 4, 1, -16 -; CHECK-32-P10-DAG: rlwinm 3, 5, 3, 28, 28 +; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28 +; CHECK-32-P10-NEXT: addi 4, 1, -16 ; CHECK-32-P10-NEXT: stxv 34, -16(1) ; CHECK-32-P10-NEXT: stfdx 1, 4, 3 ; CHECK-32-P10-NEXT: lxv 34, -16(1) @@ -613,17 +607,17 @@ ; CHECK-64-LABEL: testDouble2: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: ld 6, 0(3) -; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28 -; CHECK-64-DAG: addi 7, 1, -32 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 ; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-64-NEXT: stdx 6, 7, 4 ; CHECK-64-NEXT: li 4, 1 ; CHECK-64-NEXT: lxv 0, -32(1) ; CHECK-64-NEXT: ldx 3, 3, 4 -; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 -; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: addi 4, 1, -16 ; CHECK-64-NEXT: stxv 0, -16(1) -; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: stdx 3, 4, 5 ; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; @@ -646,20 +640,18 @@ ; CHECK-64-P10-LABEL: testDouble2: ; CHECK-64-P10: # %bb.0: # %entry ; CHECK-64-P10-NEXT: ld 6, 0(3) -; CHECK-64-P10-NEXT: extsw 4, 4 ; CHECK-64-P10-NEXT: pld 3, 1(3), 0 ; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 ; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6 -; CHECK-64-P10-NEXT: extsw 4, 5 -; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: rlwinm 4, 5, 3, 0, 28 ; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 ; CHECK-64-P10-NEXT: blr ; ; CHECK-32-P10-LABEL: testDouble2: ; CHECK-32-P10: # %bb.0: # %entry ; CHECK-32-P10-NEXT: lfd 0, 0(3) -; CHECK-32-P10-DAG: addi 6, 1, -32 -; CHECK-32-P10-DAG: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: addi 6, 1, -32 ; CHECK-32-P10-NEXT: stxv 34, -32(1) ; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: stfdx 0, 6, 4 @@ -683,8 +675,9 @@ ; CHECK-64-LABEL: testDouble3: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: lis 6, 1 -; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28 -; CHECK-64-DAG: addi 7, 1, -32 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-64-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-64-NEXT: ldx 6, 3, 6 ; CHECK-64-NEXT: stxv 34, -32(1) ; CHECK-64-NEXT: stdx 6, 7, 4 @@ -692,10 +685,9 @@ ; CHECK-64-NEXT: lxv 0, -32(1) ; CHECK-64-NEXT: rldic 4, 4, 36, 27 ; CHECK-64-NEXT: ldx 3, 3, 4 -; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 -; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: addi 4, 1, -16 ; CHECK-64-NEXT: stxv 0, -16(1) -; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: stdx 3, 4, 5 ; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; @@ -719,22 +711,20 @@ ; CHECK-64-P10-LABEL: testDouble3: ; CHECK-64-P10: # %bb.0: # %entry ; CHECK-64-P10-NEXT: pld 6, 65536(3), 0 -; CHECK-64-P10-NEXT: extsw 4, 4 ; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 ; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6 ; CHECK-64-P10-NEXT: li 4, 1 ; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27 ; CHECK-64-P10-NEXT: ldx 3, 3, 4 -; CHECK-64-P10-NEXT: extsw 4, 5 -; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: rlwinm 4, 5, 3, 0, 28 ; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 ; CHECK-64-P10-NEXT: blr ; ; CHECK-32-P10-LABEL: testDouble3: ; CHECK-32-P10: # %bb.0: # %entry ; CHECK-32-P10-NEXT: plfd 0, 65536(3), 0 -; CHECK-32-P10-DAG: addi 6, 1, -32 -; CHECK-32-P10-DAG: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: addi 6, 1, -32 ; CHECK-32-P10-NEXT: stxv 34, -32(1) ; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: stfdx 0, 6, 4 diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll --- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -1112,10 +1112,11 @@ define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) { ; CHECK-LABEL: getvelsc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 8 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 7 -; CHECK-NEXT: lvsl v3, 0, r4 -; CHECK-NEXT: andc r3, r3, r5 +; CHECK-NEXT: andi. r5, r4, 8 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 @@ -1126,10 +1127,11 @@ ; CHECK-LE-LABEL: getvelsc: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 8 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 7 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 3 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -1139,10 +1141,11 @@ ; ; CHECK-AIX-LABEL: getvelsc: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 8 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 7 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: andi. 5, 3, 8 ; CHECK-AIX-NEXT: andc 3, 4, 3 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 @@ -1160,10 +1163,11 @@ define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) { ; CHECK-LABEL: getveluc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 8 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 7 -; CHECK-NEXT: lvsl v3, 0, r4 -; CHECK-NEXT: andc r3, r3, r5 +; CHECK-NEXT: andi. r5, r4, 8 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 @@ -1174,10 +1178,11 @@ ; CHECK-LE-LABEL: getveluc: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 8 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 7 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 3 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -1187,10 +1192,11 @@ ; ; CHECK-AIX-LABEL: getveluc: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 8 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 7 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: andi. 5, 3, 8 ; CHECK-AIX-NEXT: andc 3, 4, 3 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 @@ -1672,12 +1678,13 @@ define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) { ; CHECK-LABEL: getvelss: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 4 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 3 -; CHECK-NEXT: sldi r4, r4, 1 -; CHECK-NEXT: andc r3, r3, r5 -; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: andi. r5, r4, 4 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: sldi r5, r5, 1 ; CHECK-NEXT: sldi r3, r3, 4 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 ; CHECK-NEXT: srd r3, r4, r3 @@ -1687,11 +1694,12 @@ ; CHECK-LE-LABEL: getvelss: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 4 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 1 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 3 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 4 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -1701,12 +1709,13 @@ ; ; CHECK-AIX-LABEL: getvelss: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 4 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 3 -; CHECK-AIX-NEXT: sldi 5, 5, 1 +; CHECK-AIX-NEXT: andi. 5, 3, 4 ; CHECK-AIX-NEXT: andc 3, 4, 3 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: sldi 5, 5, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 4 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 ; CHECK-AIX-NEXT: srd 3, 4, 3 @@ -1723,12 +1732,13 @@ define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) { ; CHECK-LABEL: getvelus: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 4 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 3 -; CHECK-NEXT: sldi r4, r4, 1 -; CHECK-NEXT: andc r3, r3, r5 -; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: andi. r5, r4, 4 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: sldi r5, r5, 1 ; CHECK-NEXT: sldi r3, r3, 4 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 ; CHECK-NEXT: srd r3, r4, r3 @@ -1738,11 +1748,12 @@ ; CHECK-LE-LABEL: getvelus: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 4 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 1 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 3 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 4 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -1752,12 +1763,13 @@ ; ; CHECK-AIX-LABEL: getvelus: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 4 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 3 -; CHECK-AIX-NEXT: sldi 5, 5, 1 +; CHECK-AIX-NEXT: andi. 5, 3, 4 ; CHECK-AIX-NEXT: andc 3, 4, 3 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: sldi 5, 5, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 4 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 ; CHECK-AIX-NEXT: srd 3, 4, 3 @@ -1988,12 +2000,13 @@ define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) { ; CHECK-LABEL: getvelsi: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 2 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 1 -; CHECK-NEXT: sldi r4, r4, 2 -; CHECK-NEXT: andc r3, r3, r5 -; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: andi. r5, r4, 2 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: sldi r5, r5, 2 ; CHECK-NEXT: sldi r3, r3, 5 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 ; CHECK-NEXT: srd r3, r4, r3 @@ -2003,11 +2016,12 @@ ; CHECK-LE-LABEL: getvelsi: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 2 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 2 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 5 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -2017,12 +2031,13 @@ ; ; CHECK-AIX-LABEL: getvelsi: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 2 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 1 -; CHECK-AIX-NEXT: sldi 5, 5, 2 +; CHECK-AIX-NEXT: andi. 5, 3, 2 ; CHECK-AIX-NEXT: andc 3, 4, 3 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: sldi 5, 5, 2 ; CHECK-AIX-NEXT: sldi 3, 3, 5 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 ; CHECK-AIX-NEXT: srd 3, 4, 3 @@ -2038,12 +2053,13 @@ define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) { ; CHECK-LABEL: getvelui: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 2 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 1 -; CHECK-NEXT: sldi r4, r4, 2 -; CHECK-NEXT: andc r3, r3, r5 -; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: andi. r5, r4, 2 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: sldi r5, r5, 2 ; CHECK-NEXT: sldi r3, r3, 5 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 ; CHECK-NEXT: srd r3, r4, r3 @@ -2053,11 +2069,12 @@ ; CHECK-LE-LABEL: getvelui: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 2 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 2 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 5 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -2067,12 +2084,13 @@ ; ; CHECK-AIX-LABEL: getvelui: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 2 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 1 -; CHECK-AIX-NEXT: sldi 5, 5, 2 +; CHECK-AIX-NEXT: andi. 5, 3, 2 ; CHECK-AIX-NEXT: andc 3, 4, 3 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: sldi 5, 5, 2 ; CHECK-AIX-NEXT: sldi 3, 3, 5 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 ; CHECK-AIX-NEXT: srd 3, 4, 3 @@ -2186,7 +2204,8 @@ define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) { ; CHECK-LABEL: getvelsl: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r3, r5, 1 +; CHECK-NEXT: clrldi r3, r5, 32 +; CHECK-NEXT: andi. r3, r3, 1 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 @@ -2196,7 +2215,8 @@ ; CHECK-LE-LABEL: getvelsl: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 3 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 @@ -2205,6 +2225,7 @@ ; ; CHECK-AIX-LABEL: getvelsl: ; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: andi. 3, 3, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 @@ -2221,7 +2242,8 @@ define i64 @getvelul(<2 x i64> %vul, i32 signext %i) { ; CHECK-LABEL: getvelul: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r3, r5, 1 +; CHECK-NEXT: clrldi r3, r5, 32 +; CHECK-NEXT: andi. r3, r3, 1 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 @@ -2231,7 +2253,8 @@ ; CHECK-LE-LABEL: getvelul: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 3 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 @@ -2240,6 +2263,7 @@ ; ; CHECK-AIX-LABEL: getvelul: ; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: andi. 3, 3, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 @@ -2357,7 +2381,7 @@ define float @getvelf(<4 x float> %vf, i32 signext %i) { ; CHECK-LABEL: getvelf: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 2 +; CHECK-NEXT: rldic r3, r5, 2, 30 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: xscvspdpn f1, v2 @@ -2365,7 +2389,8 @@ ; ; CHECK-LE-LABEL: getvelf: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xori r3, r5, 3 +; CHECK-LE-NEXT: clrldi r3, r5, 32 +; CHECK-LE-NEXT: xori r3, r3, 3 ; CHECK-LE-NEXT: sldi r3, r3, 2 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 @@ -2374,7 +2399,7 @@ ; ; CHECK-AIX-LABEL: getvelf: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: sldi 3, 3, 2 +; CHECK-AIX-NEXT: rldic 3, 3, 2, 30 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: xscvspdpn 1, 34 @@ -2436,7 +2461,8 @@ define double @getveld(<2 x double> %vd, i32 signext %i) { ; CHECK-LABEL: getveld: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r3, r5, 1 +; CHECK-NEXT: clrldi r3, r5, 32 +; CHECK-NEXT: andi. r3, r3, 1 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 @@ -2447,7 +2473,8 @@ ; CHECK-LE-LABEL: getveld: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 3 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 @@ -2457,6 +2484,7 @@ ; ; CHECK-AIX-LABEL: getveld: ; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: andi. 3, 3, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 diff --git a/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll b/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll --- a/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll +++ b/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll @@ -6,7 +6,7 @@ ; RUN: --check-prefix=CHECK-P7 ; Function Attrs: norecurse nounwind readnone -define signext i32 @geti(<4 x i32> %a, i32 signext %b) { +define zeroext i32 @geti(<4 x i32> %a, i32 zeroext %b) { ; CHECK-LABEL: geti: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li 3, 2 @@ -19,7 +19,7 @@ ; CHECK-NEXT: sldi 3, 3, 5 ; CHECK-NEXT: mfvsrd 4, 34 ; CHECK-NEXT: srd 3, 4, 3 -; CHECK-NEXT: extsw 3, 3 +; CHECK-NEXT: clrldi 3, 3, 32 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: geti: @@ -33,7 +33,7 @@ ; CHECK-BE-NEXT: vperm 2, 2, 2, 3 ; CHECK-BE-NEXT: mfvsrd 4, 34 ; CHECK-BE-NEXT: srd 3, 4, 3 -; CHECK-BE-NEXT: extsw 3, 3 +; CHECK-BE-NEXT: clrldi 3, 3, 32 ; CHECK-BE-NEXT: blr ; ; CHECK-P7-LABEL: geti: @@ -41,7 +41,7 @@ ; CHECK-P7-NEXT: addi 3, 1, -16 ; CHECK-P7-NEXT: rlwinm 4, 5, 2, 28, 29 ; CHECK-P7-NEXT: stxvw4x 34, 0, 3 -; CHECK-P7-NEXT: lwax 3, 3, 4 +; CHECK-P7-NEXT: lwzx 3, 3, 4 ; CHECK-P7-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 %b @@ -49,7 +49,7 @@ } ; Function Attrs: norecurse nounwind readnone -define i64 @getl(<2 x i64> %a, i32 signext %b) { +define i64 @getl(<2 x i64> %a, i32 zeroext %b) { ; CHECK-LABEL: getl: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li 3, 1 @@ -82,7 +82,7 @@ } ; Function Attrs: norecurse nounwind readnone -define float @getf(<4 x float> %a, i32 signext %b) { +define float @getf(<4 x float> %a, i32 zeroext %b) { ; CHECK-LABEL: getf: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xori 3, 5, 3 @@ -113,7 +113,7 @@ } ; Function Attrs: norecurse nounwind readnone -define double @getd(<2 x double> %a, i32 signext %b) { +define double @getd(<2 x double> %a, i32 zeroext %b) { ; CHECK-LABEL: getd: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li 3, 1 diff --git a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll --- a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll +++ b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll @@ -5,13 +5,15 @@ define zeroext i8 @test1(<16 x i8> %a, i32 signext %index) { ; CHECK-LE-LABEL: test1: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: vextubrx 3, 3, 2 ; CHECK-LE-NEXT: clrldi 3, 3, 56 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: vextublx 3, 3, 2 ; CHECK-BE-NEXT: clrldi 3, 3, 56 ; CHECK-BE-NEXT: blr @@ -23,13 +25,15 @@ define signext i8 @test2(<16 x i8> %a, i32 signext %index) { ; CHECK-LE-LABEL: test2: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: vextubrx 3, 3, 2 ; CHECK-LE-NEXT: extsb 3, 3 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: vextublx 3, 3, 2 ; CHECK-BE-NEXT: extsb 3, 3 ; CHECK-BE-NEXT: blr @@ -41,14 +45,16 @@ define zeroext i16 @test3(<8 x i16> %a, i32 signext %index) { ; CHECK-LE-LABEL: test3: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-LE-NEXT: vextuhrx 3, 3, 2 ; CHECK-LE-NEXT: clrldi 3, 3, 48 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-BE-NEXT: vextuhlx 3, 3, 2 ; CHECK-BE-NEXT: clrldi 3, 3, 48 ; CHECK-BE-NEXT: blr @@ -61,14 +67,16 @@ define signext i16 @test4(<8 x i16> %a, i32 signext %index) { ; CHECK-LE-LABEL: test4: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-LE-NEXT: vextuhrx 3, 3, 2 ; CHECK-LE-NEXT: extsh 3, 3 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test4: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-BE-NEXT: vextuhlx 3, 3, 2 ; CHECK-BE-NEXT: extsh 3, 3 ; CHECK-BE-NEXT: blr @@ -81,13 +89,15 @@ define zeroext i32 @test5(<4 x i32> %a, i32 signext %index) { ; CHECK-LE-LABEL: test5: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-LE-NEXT: vextuwrx 3, 3, 2 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: blr @@ -99,14 +109,16 @@ define signext i32 @test6(<4 x i32> %a, i32 signext %index) { ; CHECK-LE-LABEL: test6: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-LE-NEXT: vextuwrx 3, 3, 2 ; CHECK-LE-NEXT: extsw 3, 3 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test6: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: extsw 3, 3 ; CHECK-BE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll b/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll --- a/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll +++ b/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll @@ -5,14 +5,16 @@ define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) { ; CHECK-LE-LABEL: test_add1: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: vextubrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: clrldi 3, 3, 56 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test_add1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: vextublx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: clrldi 3, 3, 56 ; CHECK-BE-NEXT: blr @@ -28,14 +30,16 @@ define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) { ; CHECK-LE-LABEL: test_add2: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: vextubrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: extsb 3, 3 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test_add2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: vextublx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: extsb 3, 3 ; CHECK-BE-NEXT: blr @@ -51,7 +55,8 @@ define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) { ; CHECK-LE-LABEL: test_add3: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-LE-NEXT: vextuhrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: clrldi 3, 3, 48 @@ -59,7 +64,8 @@ ; ; CHECK-BE-LABEL: test_add3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-BE-NEXT: vextuhlx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: clrldi 3, 3, 48 @@ -76,7 +82,8 @@ define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) { ; CHECK-LE-LABEL: test_add4: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-LE-NEXT: vextuhrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: extsh 3, 3 @@ -84,7 +91,8 @@ ; ; CHECK-BE-LABEL: test_add4: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-BE-NEXT: vextuhlx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: extsh 3, 3 @@ -101,7 +109,8 @@ define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) { ; CHECK-LE-LABEL: test_add5: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-LE-NEXT: vextuwrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: clrldi 3, 3, 32 @@ -109,7 +118,8 @@ ; ; CHECK-BE-LABEL: test_add5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: clrldi 3, 3, 32 @@ -123,7 +133,8 @@ define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) { ; CHECK-LE-LABEL: test_add6: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-LE-NEXT: vextuwrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: extsw 3, 3 @@ -131,7 +142,8 @@ ; ; CHECK-BE-LABEL: test_add6: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: extsw 3, 3 diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll @@ -353,16 +353,14 @@ ; CHECK-LABEL: testFloat1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xscvdpspn v3, f1 -; CHECK-NEXT: extsw r3, r6 -; CHECK-NEXT: slwi r3, r3, 2 +; CHECK-NEXT: slwi r3, r6, 2 ; CHECK-NEXT: vinswvrx v2, r3, v3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testFloat1: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xscvdpspn v3, f1 -; CHECK-BE-NEXT: extsw r3, r6 -; CHECK-BE-NEXT: slwi r3, r3, 2 +; CHECK-BE-NEXT: slwi r3, r6, 2 ; CHECK-BE-NEXT: vinswvlx v2, r3, v3 ; CHECK-BE-NEXT: blr ; @@ -392,74 +390,54 @@ ; CHECK-LABEL: testFloat2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lwz r3, 0(r5) -; CHECK-NEXT: extsw r4, r6 -; CHECK-NEXT: slwi r4, r4, 2 +; CHECK-NEXT: slwi r4, r6, 2 ; CHECK-NEXT: vinswrx v2, r4, r3 ; CHECK-NEXT: lwz r3, 1(r5) -; CHECK-NEXT: extsw r4, r7 -; CHECK-NEXT: slwi r4, r4, 2 +; CHECK-NEXT: slwi r4, r7, 2 ; CHECK-NEXT: vinswrx v2, r4, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testFloat2: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lwz r3, 0(r5) -; CHECK-BE-NEXT: extsw r4, r6 -; CHECK-BE-NEXT: slwi r4, r4, 2 +; CHECK-BE-NEXT: slwi r4, r6, 2 ; CHECK-BE-NEXT: vinswlx v2, r4, r3 ; CHECK-BE-NEXT: lwz r3, 1(r5) -; CHECK-BE-NEXT: extsw r4, r7 -; CHECK-BE-NEXT: slwi r4, r4, 2 +; CHECK-BE-NEXT: slwi r4, r7, 2 ; CHECK-BE-NEXT: vinswlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testFloat2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lwz r3, 0(r5) ; CHECK-P9-NEXT: rlwinm r4, r6, 2, 28, 29 -; CHECK-P9-NEXT: addi r6, r1, -16 +; CHECK-P9-NEXT: lwz r6, 0(r5) +; CHECK-P9-NEXT: rlwinm r3, r7, 2, 28, 29 +; CHECK-P9-NEXT: addi r7, r1, -16 ; CHECK-P9-NEXT: stxv v2, -16(r1) -; CHECK-P9-NEXT: stwx r3, r6, r4 -; CHECK-P9-NEXT: rlwinm r4, r7, 2, 28, 29 +; CHECK-P9-NEXT: stwx r6, r7, r4 ; CHECK-P9-NEXT: lxv vs0, -16(r1) -; CHECK-P9-NEXT: lwz r3, 1(r5) +; CHECK-P9-NEXT: lwz r4, 1(r5) ; CHECK-P9-NEXT: addi r5, r1, -32 ; CHECK-P9-NEXT: stxv vs0, -32(r1) -; CHECK-P9-NEXT: stwx r3, r5, r4 +; CHECK-P9-NEXT: stwx r4, r5, r3 ; CHECK-P9-NEXT: lxv v2, -32(r1) ; CHECK-P9-NEXT: blr ; -; AIX-P8-64-LABEL: testFloat2: -; AIX-P8-64: # %bb.0: # %entry -; AIX-P8-64-NEXT: lwz r7, 0(r3) -; AIX-P8-64-NEXT: addi r6, r1, -32 -; AIX-P8-64-NEXT: rlwinm r4, r4, 2, 28, 29 -; AIX-P8-64-NEXT: rlwinm r5, r5, 2, 28, 29 -; AIX-P8-64-NEXT: stxvw4x v2, 0, r6 -; AIX-P8-64-NEXT: stwx r7, r6, r4 -; AIX-P8-64-NEXT: addi r4, r1, -16 -; AIX-P8-64-NEXT: lxvw4x vs0, 0, r6 -; AIX-P8-64-NEXT: lwz r3, 1(r3) -; AIX-P8-64-NEXT: stxvw4x vs0, 0, r4 -; AIX-P8-64-NEXT: stwx r3, r4, r5 -; AIX-P8-64-NEXT: lxvw4x v2, 0, r4 -; AIX-P8-64-NEXT: blr -; -; AIX-P8-32-LABEL: testFloat2: -; AIX-P8-32: # %bb.0: # %entry -; AIX-P8-32-NEXT: lwz r7, 0(r3) -; AIX-P8-32-NEXT: addi r6, r1, -32 -; AIX-P8-32-NEXT: rlwinm r4, r4, 2, 28, 29 -; AIX-P8-32-NEXT: stxvw4x v2, 0, r6 -; AIX-P8-32-NEXT: stwx r7, r6, r4 -; AIX-P8-32-NEXT: rlwinm r4, r5, 2, 28, 29 -; AIX-P8-32-NEXT: addi r5, r1, -16 -; AIX-P8-32-NEXT: lxvw4x vs0, 0, r6 -; AIX-P8-32-NEXT: lwz r3, 1(r3) -; AIX-P8-32-NEXT: stxvw4x vs0, 0, r5 -; AIX-P8-32-NEXT: stwx r3, r5, r4 -; AIX-P8-32-NEXT: lxvw4x v2, 0, r5 -; AIX-P8-32-NEXT: blr +; AIX-P8-LABEL: testFloat2: +; AIX-P8: # %bb.0: # %entry +; AIX-P8-NEXT: lwz r7, 0(r3) +; AIX-P8-NEXT: addi r6, r1, -32 +; AIX-P8-NEXT: rlwinm r4, r4, 2, 28, 29 +; AIX-P8-NEXT: stxvw4x v2, 0, r6 +; AIX-P8-NEXT: stwx r7, r6, r4 +; AIX-P8-NEXT: rlwinm r4, r5, 2, 28, 29 +; AIX-P8-NEXT: addi r5, r1, -16 +; AIX-P8-NEXT: lxvw4x vs0, 0, r6 +; AIX-P8-NEXT: lwz r3, 1(r3) +; AIX-P8-NEXT: stxvw4x vs0, 0, r5 +; AIX-P8-NEXT: stwx r3, r5, r4 +; AIX-P8-NEXT: lxvw4x v2, 0, r5 +; AIX-P8-NEXT: blr entry: %add.ptr1 = getelementptr inbounds i8, ptr %b, i64 1 %0 = load float, ptr %b, align 4 @@ -473,13 +451,11 @@ ; CHECK-LABEL: testFloat3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: plwz r3, 65536(r5), 0 -; CHECK-NEXT: extsw r4, r6 -; CHECK-NEXT: slwi r4, r4, 2 +; CHECK-NEXT: slwi r4, r6, 2 ; CHECK-NEXT: vinswrx v2, r4, r3 ; CHECK-NEXT: li r3, 1 -; CHECK-NEXT: extsw r4, r7 +; CHECK-NEXT: slwi r4, r7, 2 ; CHECK-NEXT: rldic r3, r3, 36, 27 -; CHECK-NEXT: slwi r4, r4, 2 ; CHECK-NEXT: lwzx r3, r5, r3 ; CHECK-NEXT: vinswrx v2, r4, r3 ; CHECK-NEXT: blr @@ -487,42 +463,39 @@ ; CHECK-BE-LABEL: testFloat3: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: plwz r3, 65536(r5), 0 -; CHECK-BE-NEXT: extsw r4, r6 -; CHECK-BE-NEXT: slwi r4, r4, 2 +; CHECK-BE-NEXT: slwi r4, r6, 2 ; CHECK-BE-NEXT: vinswlx v2, r4, r3 ; CHECK-BE-NEXT: li r3, 1 -; CHECK-BE-NEXT: extsw r4, r7 +; CHECK-BE-NEXT: slwi r4, r7, 2 ; CHECK-BE-NEXT: rldic r3, r3, 36, 27 -; CHECK-BE-NEXT: slwi r4, r4, 2 ; CHECK-BE-NEXT: lwzx r3, r5, r3 ; CHECK-BE-NEXT: vinswlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testFloat3: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lis r3, 1 ; CHECK-P9-NEXT: rlwinm r4, r6, 2, 28, 29 -; CHECK-P9-NEXT: addi r6, r1, -16 -; CHECK-P9-NEXT: lwzx r3, r5, r3 +; CHECK-P9-NEXT: lis r6, 1 +; CHECK-P9-NEXT: rlwinm r3, r7, 2, 28, 29 +; CHECK-P9-NEXT: addi r7, r1, -16 +; CHECK-P9-NEXT: lwzx r6, r5, r6 ; CHECK-P9-NEXT: stxv v2, -16(r1) -; CHECK-P9-NEXT: stwx r3, r6, r4 -; CHECK-P9-NEXT: li r3, 1 -; CHECK-P9-NEXT: rlwinm r4, r7, 2, 28, 29 +; CHECK-P9-NEXT: stwx r6, r7, r4 +; CHECK-P9-NEXT: li r4, 1 ; CHECK-P9-NEXT: lxv vs0, -16(r1) -; CHECK-P9-NEXT: rldic r3, r3, 36, 27 -; CHECK-P9-NEXT: lwzx r3, r5, r3 +; CHECK-P9-NEXT: rldic r4, r4, 36, 27 +; CHECK-P9-NEXT: lwzx r4, r5, r4 ; CHECK-P9-NEXT: addi r5, r1, -32 ; CHECK-P9-NEXT: stxv vs0, -32(r1) -; CHECK-P9-NEXT: stwx r3, r5, r4 +; CHECK-P9-NEXT: stwx r4, r5, r3 ; CHECK-P9-NEXT: lxv v2, -32(r1) ; CHECK-P9-NEXT: blr ; ; AIX-P8-64-LABEL: testFloat3: ; AIX-P8-64: # %bb.0: # %entry ; AIX-P8-64-NEXT: lis r6, 1 -; AIX-P8-64-NEXT: addi r7, r1, -32 ; AIX-P8-64-NEXT: rlwinm r4, r4, 2, 28, 29 -; AIX-P8-64-NEXT: rlwinm r5, r5, 2, 28, 29 +; AIX-P8-64-NEXT: addi r7, r1, -32 ; AIX-P8-64-NEXT: lwzx r6, r3, r6 ; AIX-P8-64-NEXT: stxvw4x v2, 0, r7 ; AIX-P8-64-NEXT: stwx r6, r7, r4 @@ -530,10 +503,11 @@ ; AIX-P8-64-NEXT: lxvw4x vs0, 0, r7 ; AIX-P8-64-NEXT: rldic r4, r4, 36, 27 ; AIX-P8-64-NEXT: lwzx r3, r3, r4 -; AIX-P8-64-NEXT: addi r4, r1, -16 -; AIX-P8-64-NEXT: stxvw4x vs0, 0, r4 -; AIX-P8-64-NEXT: stwx r3, r4, r5 -; AIX-P8-64-NEXT: lxvw4x v2, 0, r4 +; AIX-P8-64-NEXT: rlwinm r4, r5, 2, 28, 29 +; AIX-P8-64-NEXT: addi r5, r1, -16 +; AIX-P8-64-NEXT: stxvw4x vs0, 0, r5 +; AIX-P8-64-NEXT: stwx r3, r5, r4 +; AIX-P8-64-NEXT: lxvw4x v2, 0, r5 ; AIX-P8-64-NEXT: blr ; ; AIX-P8-32-LABEL: testFloat3: @@ -750,17 +724,15 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) { ; CHECK-LABEL: testDouble1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: extsw r4, r6 ; CHECK-NEXT: mffprd r3, f1 -; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-NEXT: vinsdrx v2, r4, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testDouble1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: extsw r4, r6 ; CHECK-BE-NEXT: mffprd r3, f1 -; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-BE-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-BE-NEXT: vinsdlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; @@ -799,41 +771,37 @@ ; CHECK-LABEL: testDouble2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld r3, 0(r5) -; CHECK-NEXT: extsw r4, r6 -; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-NEXT: vinsdrx v2, r4, r3 ; CHECK-NEXT: pld r3, 1(r5), 0 -; CHECK-NEXT: extsw r4, r7 -; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-NEXT: rlwinm r4, r7, 3, 0, 28 ; CHECK-NEXT: vinsdrx v2, r4, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testDouble2: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: ld r3, 0(r5) -; CHECK-BE-NEXT: extsw r4, r6 -; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-BE-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-BE-NEXT: vinsdlx v2, r4, r3 ; CHECK-BE-NEXT: pld r3, 1(r5), 0 -; CHECK-BE-NEXT: extsw r4, r7 -; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-BE-NEXT: rlwinm r4, r7, 3, 0, 28 ; CHECK-BE-NEXT: vinsdlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testDouble2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: ld r3, 0(r5) ; CHECK-P9-NEXT: rlwinm r4, r6, 3, 28, 28 -; CHECK-P9-NEXT: addi r6, r1, -32 +; CHECK-P9-NEXT: ld r6, 0(r5) +; CHECK-P9-NEXT: rlwinm r3, r7, 3, 28, 28 +; CHECK-P9-NEXT: addi r7, r1, -32 ; CHECK-P9-NEXT: stxv v2, -32(r1) -; CHECK-P9-NEXT: stdx r3, r6, r4 -; CHECK-P9-NEXT: li r3, 1 -; CHECK-P9-NEXT: rlwinm r4, r7, 3, 28, 28 +; CHECK-P9-NEXT: stdx r6, r7, r4 +; CHECK-P9-NEXT: li r4, 1 ; CHECK-P9-NEXT: lxv vs0, -32(r1) -; CHECK-P9-NEXT: ldx r3, r5, r3 +; CHECK-P9-NEXT: ldx r4, r5, r4 ; CHECK-P9-NEXT: addi r5, r1, -16 ; CHECK-P9-NEXT: stxv vs0, -16(r1) -; CHECK-P9-NEXT: stdx r3, r5, r4 +; CHECK-P9-NEXT: stdx r4, r5, r3 ; CHECK-P9-NEXT: lxv v2, -16(r1) ; CHECK-P9-NEXT: blr ; @@ -842,16 +810,16 @@ ; AIX-P8-64-NEXT: ld r7, 0(r3) ; AIX-P8-64-NEXT: addi r6, r1, -32 ; AIX-P8-64-NEXT: rlwinm r4, r4, 3, 28, 28 -; AIX-P8-64-NEXT: rlwinm r5, r5, 3, 28, 28 ; AIX-P8-64-NEXT: stxvd2x v2, 0, r6 ; AIX-P8-64-NEXT: stdx r7, r6, r4 ; AIX-P8-64-NEXT: li r4, 1 ; AIX-P8-64-NEXT: lxvd2x vs0, 0, r6 ; AIX-P8-64-NEXT: ldx r3, r3, r4 -; AIX-P8-64-NEXT: addi r4, r1, -16 -; AIX-P8-64-NEXT: stxvd2x vs0, 0, r4 -; AIX-P8-64-NEXT: stdx r3, r4, r5 -; AIX-P8-64-NEXT: lxvd2x v2, 0, r4 +; AIX-P8-64-NEXT: rlwinm r4, r5, 3, 28, 28 +; AIX-P8-64-NEXT: addi r5, r1, -16 +; AIX-P8-64-NEXT: stxvd2x vs0, 0, r5 +; AIX-P8-64-NEXT: stdx r3, r5, r4 +; AIX-P8-64-NEXT: lxvd2x v2, 0, r5 ; AIX-P8-64-NEXT: blr ; ; AIX-P8-32-LABEL: testDouble2: @@ -882,13 +850,11 @@ ; CHECK-LABEL: testDouble3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, 65536(r5), 0 -; CHECK-NEXT: extsw r4, r6 -; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-NEXT: vinsdrx v2, r4, r3 ; CHECK-NEXT: li r3, 1 -; CHECK-NEXT: extsw r4, r7 +; CHECK-NEXT: rlwinm r4, r7, 3, 0, 28 ; CHECK-NEXT: rldic r3, r3, 36, 27 -; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28 ; CHECK-NEXT: ldx r3, r5, r3 ; CHECK-NEXT: vinsdrx v2, r4, r3 ; CHECK-NEXT: blr @@ -896,53 +862,51 @@ ; CHECK-BE-LABEL: testDouble3: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: pld r3, 65536(r5), 0 -; CHECK-BE-NEXT: extsw r4, r6 -; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-BE-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-BE-NEXT: vinsdlx v2, r4, r3 ; CHECK-BE-NEXT: li r3, 1 -; CHECK-BE-NEXT: extsw r4, r7 +; CHECK-BE-NEXT: rlwinm r4, r7, 3, 0, 28 ; CHECK-BE-NEXT: rldic r3, r3, 36, 27 -; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28 ; CHECK-BE-NEXT: ldx r3, r5, r3 ; CHECK-BE-NEXT: vinsdlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testDouble3: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lis r3, 1 ; CHECK-P9-NEXT: rlwinm r4, r6, 3, 28, 28 -; CHECK-P9-NEXT: addi r6, r1, -32 -; CHECK-P9-NEXT: ldx r3, r5, r3 +; CHECK-P9-NEXT: lis r6, 1 +; CHECK-P9-NEXT: rlwinm r3, r7, 3, 28, 28 +; CHECK-P9-NEXT: addi r7, r1, -32 +; CHECK-P9-NEXT: ldx r6, r5, r6 ; CHECK-P9-NEXT: stxv v2, -32(r1) -; CHECK-P9-NEXT: stdx r3, r6, r4 -; CHECK-P9-NEXT: li r3, 1 -; CHECK-P9-NEXT: rlwinm r4, r7, 3, 28, 28 +; CHECK-P9-NEXT: stdx r6, r7, r4 +; CHECK-P9-NEXT: li r4, 1 ; CHECK-P9-NEXT: lxv vs0, -32(r1) -; CHECK-P9-NEXT: rldic r3, r3, 36, 27 -; CHECK-P9-NEXT: ldx r3, r5, r3 +; CHECK-P9-NEXT: rldic r4, r4, 36, 27 +; CHECK-P9-NEXT: ldx r4, r5, r4 ; CHECK-P9-NEXT: addi r5, r1, -16 ; CHECK-P9-NEXT: stxv vs0, -16(r1) -; CHECK-P9-NEXT: stdx r3, r5, r4 +; CHECK-P9-NEXT: stdx r4, r5, r3 ; CHECK-P9-NEXT: lxv v2, -16(r1) ; CHECK-P9-NEXT: blr ; ; AIX-P8-64-LABEL: testDouble3: ; AIX-P8-64: # %bb.0: # %entry ; AIX-P8-64-NEXT: lis r6, 1 -; AIX-P8-64-NEXT: addi r7, r1, -32 ; AIX-P8-64-NEXT: rlwinm r4, r4, 3, 28, 28 +; AIX-P8-64-NEXT: addi r7, r1, -32 ; AIX-P8-64-NEXT: li r8, 1 -; AIX-P8-64-NEXT: rlwinm r5, r5, 3, 28, 28 ; AIX-P8-64-NEXT: ldx r6, r3, r6 ; AIX-P8-64-NEXT: stxvd2x v2, 0, r7 ; AIX-P8-64-NEXT: stdx r6, r7, r4 ; AIX-P8-64-NEXT: rldic r4, r8, 36, 27 ; AIX-P8-64-NEXT: lxvd2x vs0, 0, r7 ; AIX-P8-64-NEXT: ldx r3, r3, r4 -; AIX-P8-64-NEXT: addi r4, r1, -16 -; AIX-P8-64-NEXT: stxvd2x vs0, 0, r4 -; AIX-P8-64-NEXT: stdx r3, r4, r5 -; AIX-P8-64-NEXT: lxvd2x v2, 0, r4 +; AIX-P8-64-NEXT: rlwinm r4, r5, 3, 28, 28 +; AIX-P8-64-NEXT: addi r5, r1, -16 +; AIX-P8-64-NEXT: stxvd2x vs0, 0, r5 +; AIX-P8-64-NEXT: stdx r3, r5, r4 +; AIX-P8-64-NEXT: lxvd2x v2, 0, r5 ; AIX-P8-64-NEXT: blr ; ; AIX-P8-32-LABEL: testDouble3: diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll @@ -25,7 +25,7 @@ ret half %r } -define half @extractelt_nxv1f16_idx( %v, i32 signext %idx) { +define half @extractelt_nxv1f16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv1f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma @@ -57,7 +57,7 @@ ret half %r } -define half @extractelt_nxv2f16_idx( %v, i32 signext %idx) { +define half @extractelt_nxv2f16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv2f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -89,7 +89,7 @@ ret half %r } -define half @extractelt_nxv4f16_idx( %v, i32 signext %idx) { +define half @extractelt_nxv4f16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv4f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma @@ -121,7 +121,7 @@ ret half %r } -define half @extractelt_nxv8f16_idx( %v, i32 signext %idx) { +define half @extractelt_nxv8f16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv8f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma @@ -153,7 +153,7 @@ ret half %r } -define half @extractelt_nxv16f16_idx( %v, i32 signext %idx) { +define half @extractelt_nxv16f16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv16f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma @@ -185,7 +185,7 @@ ret half %r } -define half @extractelt_nxv32f16_idx( %v, i32 signext %idx) { +define half @extractelt_nxv32f16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv32f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma @@ -217,7 +217,7 @@ ret float %r } -define float @extractelt_nxv1f32_idx( %v, i32 signext %idx) { +define float @extractelt_nxv1f32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv1f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -249,7 +249,7 @@ ret float %r } -define float @extractelt_nxv2f32_idx( %v, i32 signext %idx) { +define float @extractelt_nxv2f32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv2f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -281,7 +281,7 @@ ret float %r } -define float @extractelt_nxv4f32_idx( %v, i32 signext %idx) { +define float @extractelt_nxv4f32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv4f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -313,7 +313,7 @@ ret float %r } -define float @extractelt_nxv8f32_idx( %v, i32 signext %idx) { +define float @extractelt_nxv8f32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv8f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, ma @@ -345,7 +345,7 @@ ret float %r } -define float @extractelt_nxv16f32_idx( %v, i32 signext %idx) { +define float @extractelt_nxv16f32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv16f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma @@ -377,7 +377,7 @@ ret double %r } -define double @extractelt_nxv1f64_idx( %v, i32 signext %idx) { +define double @extractelt_nxv1f64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv1f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -409,7 +409,7 @@ ret double %r } -define double @extractelt_nxv2f64_idx( %v, i32 signext %idx) { +define double @extractelt_nxv2f64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv2f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma @@ -441,7 +441,7 @@ ret double %r } -define double @extractelt_nxv4f64_idx( %v, i32 signext %idx) { +define double @extractelt_nxv4f64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv4f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma @@ -473,7 +473,7 @@ ret double %r } -define double @extractelt_nxv8f64_idx( %v, i32 signext %idx) { +define double @extractelt_nxv8f64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv8f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma @@ -591,28 +591,6 @@ } define double @extractelt_nxv16f64_neg1( %v) { -; CHECK-LABEL: extractelt_nxv16f64_neg1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -64 -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: addi s0, sp, 64 -; CHECK-NEXT: .cfi_def_cfa s0, 0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: addi a0, sp, 64 -; CHECK-NEXT: vs8r.v v8, (a0) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 3 -; CHECK-NEXT: add a2, a0, a2 -; CHECK-NEXT: vs8r.v v16, (a2) -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: fld fa0, -8(a0) -; CHECK-NEXT: addi sp, s0, -64 -; CHECK-NEXT: addi sp, sp, 64 -; CHECK-NEXT: ret %r = extractelement %v, i32 -1 ret double %r } @@ -628,7 +606,7 @@ ret double %r } -define double @extractelt_nxv16f64_idx( %v, i32 signext %idx) { +define double @extractelt_nxv16f64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv16f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll @@ -23,7 +23,7 @@ ret i8 %r } -define signext i8 @extractelt_nxv1i8_idx( %v, i32 signext %idx) { +define signext i8 @extractelt_nxv1i8_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv1i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma @@ -55,7 +55,7 @@ ret i8 %r } -define signext i8 @extractelt_nxv2i8_idx( %v, i32 signext %idx) { +define signext i8 @extractelt_nxv2i8_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv2i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -87,7 +87,7 @@ ret i8 %r } -define signext i8 @extractelt_nxv4i8_idx( %v, i32 signext %idx) { +define signext i8 @extractelt_nxv4i8_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv4i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma @@ -119,7 +119,7 @@ ret i8 %r } -define signext i8 @extractelt_nxv8i8_idx( %v, i32 signext %idx) { +define signext i8 @extractelt_nxv8i8_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv8i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma @@ -151,7 +151,7 @@ ret i8 %r } -define signext i8 @extractelt_nxv16i8_idx( %v, i32 signext %idx) { +define signext i8 @extractelt_nxv16i8_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv16i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma @@ -183,7 +183,7 @@ ret i8 %r } -define signext i8 @extractelt_nxv32i8_idx( %v, i32 signext %idx) { +define signext i8 @extractelt_nxv32i8_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv32i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, m4, ta, ma @@ -215,7 +215,7 @@ ret i8 %r } -define signext i8 @extractelt_nxv64i8_idx( %v, i32 signext %idx) { +define signext i8 @extractelt_nxv64i8_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv64i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma @@ -247,7 +247,7 @@ ret i16 %r } -define signext i16 @extractelt_nxv1i16_idx( %v, i32 signext %idx) { +define signext i16 @extractelt_nxv1i16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv1i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma @@ -279,7 +279,7 @@ ret i16 %r } -define signext i16 @extractelt_nxv2i16_idx( %v, i32 signext %idx) { +define signext i16 @extractelt_nxv2i16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv2i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -311,7 +311,7 @@ ret i16 %r } -define signext i16 @extractelt_nxv4i16_idx( %v, i32 signext %idx) { +define signext i16 @extractelt_nxv4i16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv4i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma @@ -343,7 +343,7 @@ ret i16 %r } -define signext i16 @extractelt_nxv8i16_idx( %v, i32 signext %idx) { +define signext i16 @extractelt_nxv8i16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv8i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma @@ -375,7 +375,7 @@ ret i16 %r } -define signext i16 @extractelt_nxv16i16_idx( %v, i32 signext %idx) { +define signext i16 @extractelt_nxv16i16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv16i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma @@ -407,7 +407,7 @@ ret i16 %r } -define signext i16 @extractelt_nxv32i16_idx( %v, i32 signext %idx) { +define signext i16 @extractelt_nxv32i16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv32i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma @@ -439,7 +439,7 @@ ret i32 %r } -define signext i32 @extractelt_nxv1i32_idx( %v, i32 signext %idx) { +define signext i32 @extractelt_nxv1i32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv1i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -471,7 +471,7 @@ ret i32 %r } -define signext i32 @extractelt_nxv2i32_idx( %v, i32 signext %idx) { +define signext i32 @extractelt_nxv2i32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv2i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -503,7 +503,7 @@ ret i32 %r } -define signext i32 @extractelt_nxv4i32_idx( %v, i32 signext %idx) { +define signext i32 @extractelt_nxv4i32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv4i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -535,7 +535,7 @@ ret i32 %r } -define signext i32 @extractelt_nxv8i32_idx( %v, i32 signext %idx) { +define signext i32 @extractelt_nxv8i32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv8i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, ma @@ -567,7 +567,7 @@ ret i32 %r } -define signext i32 @extractelt_nxv16i32_idx( %v, i32 signext %idx) { +define signext i32 @extractelt_nxv16i32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv16i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma @@ -599,7 +599,7 @@ ret i64 %r } -define i64 @extractelt_nxv1i64_idx( %v, i32 signext %idx) { +define i64 @extractelt_nxv1i64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv1i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -631,7 +631,7 @@ ret i64 %r } -define i64 @extractelt_nxv2i64_idx( %v, i32 signext %idx) { +define i64 @extractelt_nxv2i64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv2i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma @@ -663,7 +663,7 @@ ret i64 %r } -define i64 @extractelt_nxv4i64_idx( %v, i32 signext %idx) { +define i64 @extractelt_nxv4i64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv4i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma @@ -695,7 +695,7 @@ ret i64 %r } -define i64 @extractelt_nxv8i64_idx( %v, i32 signext %idx) { +define i64 @extractelt_nxv8i64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv8i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma @@ -814,13 +814,21 @@ ; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: addi a0, sp, 64 ; CHECK-NEXT: vs8r.v v8, (a0) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 3 -; CHECK-NEXT: add a2, a0, a2 -; CHECK-NEXT: vs8r.v v16, (a2) -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: ld a0, -8(a0) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 3 +; CHECK-NEXT: add a3, a0, a1 +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vs8r.v v16, (a3) +; CHECK-NEXT: bltu a2, a1, .LBB72_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: .LBB72_2: +; CHECK-NEXT: slli a1, a2, 3 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: ld a0, 0(a0) ; CHECK-NEXT: addi sp, s0, -64 ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret @@ -839,7 +847,7 @@ ret i64 %r } -define i64 @extractelt_nxv16i64_idx( %v, i32 signext %idx) { +define i64 @extractelt_nxv16i64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv16i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -247,7 +247,7 @@ ret i64 %b } -define i8 @extractelt_v16i8_idx(<16 x i8>* %x, i32 signext %idx) nounwind { +define i8 @extractelt_v16i8_idx(<16 x i8>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v16i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma @@ -261,7 +261,7 @@ ret i8 %b } -define i16 @extractelt_v8i16_idx(<8 x i16>* %x, i32 signext %idx) nounwind { +define i16 @extractelt_v8i16_idx(<8 x i16>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v8i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -275,7 +275,7 @@ ret i16 %b } -define i32 @extractelt_v4i32_idx(<4 x i32>* %x, i32 signext %idx) nounwind { +define i32 @extractelt_v4i32_idx(<4 x i32>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v4i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -291,7 +291,7 @@ ret i32 %c } -define i64 @extractelt_v2i64_idx(<2 x i64>* %x, i32 signext %idx) nounwind { +define i64 @extractelt_v2i64_idx(<2 x i64>* %x, i32 zeroext %idx) nounwind { ; RV32-LABEL: extractelt_v2i64_idx: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -320,7 +320,7 @@ ret i64 %c } -define half @extractelt_v8f16_idx(<8 x half>* %x, i32 signext %idx) nounwind { +define half @extractelt_v8f16_idx(<8 x half>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v8f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -336,7 +336,7 @@ ret half %c } -define float @extractelt_v4f32_idx(<4 x float>* %x, i32 signext %idx) nounwind { +define float @extractelt_v4f32_idx(<4 x float>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v4f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -352,7 +352,7 @@ ret float %c } -define double @extractelt_v2f64_idx(<2 x double>* %x, i32 signext %idx) nounwind { +define double @extractelt_v2f64_idx(<2 x double>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v2f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -368,7 +368,7 @@ ret double %c } -define i8 @extractelt_v32i8_idx(<32 x i8>* %x, i32 signext %idx) nounwind { +define i8 @extractelt_v32i8_idx(<32 x i8>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v32i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 32 @@ -383,7 +383,7 @@ ret i8 %b } -define i16 @extractelt_v16i16_idx(<16 x i16>* %x, i32 signext %idx) nounwind { +define i16 @extractelt_v16i16_idx(<16 x i16>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v16i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma @@ -397,7 +397,7 @@ ret i16 %b } -define i32 @extractelt_v8i32_idx(<8 x i32>* %x, i32 signext %idx) nounwind { +define i32 @extractelt_v8i32_idx(<8 x i32>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v8i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -413,7 +413,7 @@ ret i32 %c } -define i64 @extractelt_v4i64_idx(<4 x i64>* %x, i32 signext %idx) nounwind { +define i64 @extractelt_v4i64_idx(<4 x i64>* %x, i32 zeroext %idx) nounwind { ; RV32-LABEL: extractelt_v4i64_idx: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma @@ -442,7 +442,7 @@ ret i64 %c } -define half @extractelt_v16f16_idx(<16 x half>* %x, i32 signext %idx) nounwind { +define half @extractelt_v16f16_idx(<16 x half>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v16f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma @@ -458,7 +458,7 @@ ret half %c } -define float @extractelt_v8f32_idx(<8 x float>* %x, i32 signext %idx) nounwind { +define float @extractelt_v8f32_idx(<8 x float>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v8f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -474,7 +474,7 @@ ret float %c } -define double @extractelt_v4f64_idx(<4 x double>* %x, i32 signext %idx) nounwind { +define double @extractelt_v4f64_idx(<4 x double>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v4f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma @@ -491,10 +491,10 @@ } ; This uses a non-power of 2 type so that it isn't an MVT to catch an -; incorrect use of getSimpleValueType_idx(, i32 signext %idx). +; incorrect use of getSimpleValueType_idx(, i32 zeroext %idx). ; NOTE: Type legalization is bitcasting to vXi32 and doing 2 independent ; slidedowns and extracts. -define i64 @extractelt_v3i64_idx(<3 x i64>* %x, i32 signext %idx) nounwind { +define i64 @extractelt_v3i64_idx(<3 x i64>* %x, i32 zeroext %idx) nounwind { ; RV32-LABEL: extractelt_v3i64_idx: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll @@ -17,34 +17,19 @@ } define <1 x i1> @insertelt_idx_v1i1(<1 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind { -; RV32-LABEL: insertelt_idx_v1i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: addi a0, a1, 1 -; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, ma -; RV32-NEXT: vslideup.vx v9, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vand.vi v8, v9, 1 -; RV32-NEXT: vmsne.vi v0, v8, 0 -; RV32-NEXT: ret -; -; RV64-LABEL: insertelt_idx_v1i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: sext.w a0, a1 -; RV64-NEXT: addi a1, a0, 1 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; RV64-NEXT: vslideup.vx v9, v8, a0 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vand.vi v8, v9, 1 -; RV64-NEXT: vmsne.vi v0, v8, 0 -; RV64-NEXT: ret +; CHECK-LABEL: insertelt_idx_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %y = insertelement <1 x i1> %x, i1 %elt, i32 %idx ret <1 x i1> %y } @@ -67,34 +52,19 @@ } define <2 x i1> @insertelt_idx_v2i1(<2 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind { -; RV32-LABEL: insertelt_idx_v2i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: addi a0, a1, 1 -; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, ma -; RV32-NEXT: vslideup.vx v9, v8, a1 -; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; RV32-NEXT: vand.vi v8, v9, 1 -; RV32-NEXT: vmsne.vi v0, v8, 0 -; RV32-NEXT: ret -; -; RV64-LABEL: insertelt_idx_v2i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: sext.w a0, a1 -; RV64-NEXT: addi a1, a0, 1 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; RV64-NEXT: vslideup.vx v9, v8, a0 -; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; RV64-NEXT: vand.vi v8, v9, 1 -; RV64-NEXT: vmsne.vi v0, v8, 0 -; RV64-NEXT: ret +; CHECK-LABEL: insertelt_idx_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %y = insertelement <2 x i1> %x, i1 %elt, i32 %idx ret <2 x i1> %y } @@ -117,34 +87,19 @@ } define <8 x i1> @insertelt_idx_v8i1(<8 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind { -; RV32-LABEL: insertelt_idx_v8i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: addi a0, a1, 1 -; RV32-NEXT: vsetvli zero, a0, e8, mf2, tu, ma -; RV32-NEXT: vslideup.vx v9, v8, a1 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vand.vi v8, v9, 1 -; RV32-NEXT: vmsne.vi v0, v8, 0 -; RV32-NEXT: ret -; -; RV64-LABEL: insertelt_idx_v8i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: sext.w a0, a1 -; RV64-NEXT: addi a1, a0, 1 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; RV64-NEXT: vslideup.vx v9, v8, a0 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vand.vi v8, v9, 1 -; RV64-NEXT: vmsne.vi v0, v8, 0 -; RV64-NEXT: ret +; CHECK-LABEL: insertelt_idx_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %y = insertelement <8 x i1> %x, i1 %elt, i32 %idx ret <8 x i1> %y } @@ -168,36 +123,23 @@ } define <64 x i1> @insertelt_idx_v64i1(<64 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind { -; RV32-LABEL: insertelt_idx_v64i1: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 64 -; RV32-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, 1, v0 -; RV32-NEXT: addi a0, a1, 1 -; RV32-NEXT: vsetvli zero, a0, e8, m4, tu, ma -; RV32-NEXT: vslideup.vx v12, v8, a1 -; RV32-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; RV32-NEXT: vand.vi v8, v12, 1 -; RV32-NEXT: vmsne.vi v0, v8, 0 -; RV32-NEXT: ret -; -; RV64-LABEL: insertelt_idx_v64i1: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 64 -; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vmv.v.i v12, 0 -; RV64-NEXT: vmerge.vim v12, v12, 1, v0 -; RV64-NEXT: sext.w a0, a1 -; RV64-NEXT: addi a1, a0, 1 -; RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma -; RV64-NEXT: vslideup.vx v12, v8, a0 -; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; RV64-NEXT: vand.vi v8, v12, 1 -; RV64-NEXT: vmsne.vi v0, v8, 0 -; RV64-NEXT: ret +; CHECK-LABEL: insertelt_idx_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma +; CHECK-NEXT: vslideup.vx v12, v8, a1 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vand.vi v8, v12, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %y = insertelement <64 x i1> %x, i1 %elt, i32 %idx ret <64 x i1> %y } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -110,7 +110,8 @@ ; RV64-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: vmv.s.x v12, a1 -; RV64-NEXT: sext.w a1, a2 +; RV64-NEXT: slli a1, a2, 32 +; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: addi a2, a1, 1 ; RV64-NEXT: vsetvli zero, a2, e16, m4, tu, ma ; RV64-NEXT: vslideup.vx v8, v12, a1 @@ -141,7 +142,8 @@ ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: vfmv.s.f v10, fa0 -; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: addi a2, a1, 1 ; RV64-NEXT: vsetvli zero, a2, e32, m2, tu, ma ; RV64-NEXT: vslideup.vx v8, v10, a1 @@ -190,7 +192,8 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a2, -1 ; RV64-NEXT: vmv.s.x v12, a2 -; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: addi a2, a1, 1 ; RV64-NEXT: vsetvli zero, a2, e64, m4, tu, ma ; RV64-NEXT: vslideup.vx v8, v12, a1 @@ -239,7 +242,8 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a2, 6 ; RV64-NEXT: vmv.s.x v12, a2 -; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: addi a2, a1, 1 ; RV64-NEXT: vsetvli zero, a2, e64, m4, tu, ma ; RV64-NEXT: vslideup.vx v8, v12, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll @@ -26,7 +26,7 @@ ret %r } -define @insertelt_nxv1f16_idx( %v, half %elt, i32 signext %idx) { +define @insertelt_nxv1f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma @@ -61,7 +61,7 @@ ret %r } -define @insertelt_nxv2f16_idx( %v, half %elt, i32 signext %idx) { +define @insertelt_nxv2f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma @@ -96,7 +96,7 @@ ret %r } -define @insertelt_nxv4f16_idx( %v, half %elt, i32 signext %idx) { +define @insertelt_nxv4f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma @@ -131,7 +131,7 @@ ret %r } -define @insertelt_nxv8f16_idx( %v, half %elt, i32 signext %idx) { +define @insertelt_nxv8f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma @@ -166,7 +166,7 @@ ret %r } -define @insertelt_nxv16f16_idx( %v, half %elt, i32 signext %idx) { +define @insertelt_nxv16f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv16f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma @@ -201,7 +201,7 @@ ret %r } -define @insertelt_nxv32f16_idx( %v, half %elt, i32 signext %idx) { +define @insertelt_nxv32f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv32f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma @@ -236,7 +236,7 @@ ret %r } -define @insertelt_nxv1f32_idx( %v, float %elt, i32 signext %idx) { +define @insertelt_nxv1f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma @@ -271,7 +271,7 @@ ret %r } -define @insertelt_nxv2f32_idx( %v, float %elt, i32 signext %idx) { +define @insertelt_nxv2f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma @@ -306,7 +306,7 @@ ret %r } -define @insertelt_nxv4f32_idx( %v, float %elt, i32 signext %idx) { +define @insertelt_nxv4f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma @@ -341,7 +341,7 @@ ret %r } -define @insertelt_nxv8f32_idx( %v, float %elt, i32 signext %idx) { +define @insertelt_nxv8f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma @@ -376,7 +376,7 @@ ret %r } -define @insertelt_nxv16f32_idx( %v, float %elt, i32 signext %idx) { +define @insertelt_nxv16f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv16f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma @@ -411,7 +411,7 @@ ret %r } -define @insertelt_nxv1f64_idx( %v, double %elt, i32 signext %idx) { +define @insertelt_nxv1f64_idx( %v, double %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma @@ -446,7 +446,7 @@ ret %r } -define @insertelt_nxv2f64_idx( %v, double %elt, i32 signext %idx) { +define @insertelt_nxv2f64_idx( %v, double %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma @@ -481,7 +481,7 @@ ret %r } -define @insertelt_nxv4f64_idx( %v, double %elt, i32 signext %idx) { +define @insertelt_nxv4f64_idx( %v, double %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma @@ -516,7 +516,7 @@ ret %r } -define @insertelt_nxv8f64_idx( %v, double %elt, i32 signext %idx) { +define @insertelt_nxv8f64_idx( %v, double %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll @@ -24,7 +24,7 @@ ret %r } -define @insertelt_nxv1i8_idx( %v, i8 signext %elt, i32 signext %idx) { +define @insertelt_nxv1i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma @@ -59,7 +59,7 @@ ret %r } -define @insertelt_nxv2i8_idx( %v, i8 signext %elt, i32 signext %idx) { +define @insertelt_nxv2i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma @@ -94,7 +94,7 @@ ret %r } -define @insertelt_nxv4i8_idx( %v, i8 signext %elt, i32 signext %idx) { +define @insertelt_nxv4i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma @@ -129,7 +129,7 @@ ret %r } -define @insertelt_nxv8i8_idx( %v, i8 signext %elt, i32 signext %idx) { +define @insertelt_nxv8i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma @@ -164,7 +164,7 @@ ret %r } -define @insertelt_nxv16i8_idx( %v, i8 signext %elt, i32 signext %idx) { +define @insertelt_nxv16i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv16i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma @@ -199,7 +199,7 @@ ret %r } -define @insertelt_nxv32i8_idx( %v, i8 signext %elt, i32 signext %idx) { +define @insertelt_nxv32i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv32i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma @@ -234,7 +234,7 @@ ret %r } -define @insertelt_nxv64i8_idx( %v, i8 signext %elt, i32 signext %idx) { +define @insertelt_nxv64i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv64i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma @@ -269,7 +269,7 @@ ret %r } -define @insertelt_nxv1i16_idx( %v, i16 signext %elt, i32 signext %idx) { +define @insertelt_nxv1i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma @@ -304,7 +304,7 @@ ret %r } -define @insertelt_nxv2i16_idx( %v, i16 signext %elt, i32 signext %idx) { +define @insertelt_nxv2i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma @@ -339,7 +339,7 @@ ret %r } -define @insertelt_nxv4i16_idx( %v, i16 signext %elt, i32 signext %idx) { +define @insertelt_nxv4i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma @@ -374,7 +374,7 @@ ret %r } -define @insertelt_nxv8i16_idx( %v, i16 signext %elt, i32 signext %idx) { +define @insertelt_nxv8i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma @@ -409,7 +409,7 @@ ret %r } -define @insertelt_nxv16i16_idx( %v, i16 signext %elt, i32 signext %idx) { +define @insertelt_nxv16i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv16i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -444,7 +444,7 @@ ret %r } -define @insertelt_nxv32i16_idx( %v, i16 signext %elt, i32 signext %idx) { +define @insertelt_nxv32i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv32i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -479,7 +479,7 @@ ret %r } -define @insertelt_nxv1i32_idx( %v, i32 signext %elt, i32 signext %idx) { +define @insertelt_nxv1i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma @@ -514,7 +514,7 @@ ret %r } -define @insertelt_nxv2i32_idx( %v, i32 signext %elt, i32 signext %idx) { +define @insertelt_nxv2i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma @@ -549,7 +549,7 @@ ret %r } -define @insertelt_nxv4i32_idx( %v, i32 signext %elt, i32 signext %idx) { +define @insertelt_nxv4i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma @@ -584,7 +584,7 @@ ret %r } -define @insertelt_nxv8i32_idx( %v, i32 signext %elt, i32 signext %idx) { +define @insertelt_nxv8i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma @@ -619,7 +619,7 @@ ret %r } -define @insertelt_nxv16i32_idx( %v, i32 signext %elt, i32 signext %idx) { +define @insertelt_nxv16i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv16i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -659,7 +659,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 @@ -695,7 +696,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v10, a0 @@ -731,7 +733,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v12, a0 @@ -767,7 +770,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v16, a0 diff --git a/llvm/test/CodeGen/VE/Vector/extract_elt.ll b/llvm/test/CodeGen/VE/Vector/extract_elt.ll --- a/llvm/test/CodeGen/VE/Vector/extract_elt.ll +++ b/llvm/test/CodeGen/VE/Vector/extract_elt.ll @@ -6,6 +6,7 @@ define fastcc i64 @extract_rr_v256i64(i32 signext %idx, <256 x i64> %v) { ; CHECK-LABEL: extract_rr_v256i64: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lvs %s0, %v0(%s0) ; CHECK-NEXT: b.l.t (, %s10) %ret = extractelement <256 x i64> %v, i32 %idx @@ -45,6 +46,7 @@ define fastcc i32 @extract_rr_v256i32(i32 signext %idx, <256 x i32> %v) { ; CHECK-LABEL: extract_rr_v256i32: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lvs %s0, %v0(%s0) ; CHECK-NEXT: b.l.t (, %s10) %ret = extractelement <256 x i32> %v, i32 %idx @@ -84,7 +86,10 @@ define fastcc i32 @extract_rr_v512i32(<512 x i32> %v, i32 signext %idx) { ; CHECK-LABEL: extract_rr_v512i32: ; CHECK: # %bb.0: -; CHECK-NEXT: srl %s1, %s0, 1 +; CHECK-NEXT: lea %s1, -2 +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: and %s1, %s0, %s1 +; CHECK-NEXT: srl %s1, %s1, 1 ; CHECK-NEXT: lvs %s1, %v0(%s1) ; CHECK-NEXT: nnd %s0, %s0, (63)0 ; CHECK-NEXT: sla.w.sx %s0, %s0, 5 @@ -100,6 +105,7 @@ define fastcc double @extract_rr_v256f64(i32 signext %idx, <256 x double> %v) { ; CHECK-LABEL: extract_rr_v256f64: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lvs %s0, %v0(%s0) ; CHECK-NEXT: b.l.t (, %s10) %ret = extractelement <256 x double> %v, i32 %idx @@ -139,6 +145,7 @@ define fastcc float @extract_rr_v256f32(i32 signext %idx, <256 x float> %v) { ; CHECK-LABEL: extract_rr_v256f32: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lvs %s0, %v0(%s0) ; CHECK-NEXT: b.l.t (, %s10) %ret = extractelement <256 x float> %v, i32 %idx @@ -179,7 +186,10 @@ define fastcc float @extract_rr_v512f32(<512 x float> %v, i32 signext %idx) { ; CHECK-LABEL: extract_rr_v512f32: ; CHECK: # %bb.0: -; CHECK-NEXT: srl %s1, %s0, 1 +; CHECK-NEXT: lea %s1, -2 +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: and %s1, %s0, %s1 +; CHECK-NEXT: srl %s1, %s1, 1 ; CHECK-NEXT: lvs %s1, %v0(%s1) ; CHECK-NEXT: nnd %s0, %s0, (63)0 ; CHECK-NEXT: sla.w.sx %s0, %s0, 5 diff --git a/llvm/test/CodeGen/VE/Vector/insert_elt.ll b/llvm/test/CodeGen/VE/Vector/insert_elt.ll --- a/llvm/test/CodeGen/VE/Vector/insert_elt.ll +++ b/llvm/test/CodeGen/VE/Vector/insert_elt.ll @@ -6,6 +6,7 @@ define fastcc <256 x i64> @insert_rr_v256i64(i32 signext %idx, i64 %s) { ; CHECK-LABEL: insert_rr_v256i64: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lsv %v0(%s0), %s1 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x i64> undef, i64 %s, i32 %idx @@ -46,6 +47,7 @@ ; CHECK-LABEL: insert_rr_v256i32: ; CHECK: # %bb.0: ; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lsv %v0(%s0), %s1 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x i32> undef, i32 %s, i32 %idx @@ -94,6 +96,9 @@ ; CHECK-NEXT: nnd %s2, %s0, (63)0 ; CHECK-NEXT: sla.w.sx %s2, %s2, 5 ; CHECK-NEXT: sll %s1, %s1, %s2 +; CHECK-NEXT: lea %s3, -2 +; CHECK-NEXT: and %s3, %s3, (32)0 +; CHECK-NEXT: and %s0, %s0, %s3 ; CHECK-NEXT: srl %s0, %s0, 1 ; CHECK-NEXT: lvs %s3, %v0(%s0) ; CHECK-NEXT: srl %s2, (32)1, %s2 @@ -110,6 +115,7 @@ define fastcc <256 x double> @insert_rr_v256f64(i32 signext %idx, double %s) { ; CHECK-LABEL: insert_rr_v256f64: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lsv %v0(%s0), %s1 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x double> undef, double %s, i32 %idx @@ -149,6 +155,7 @@ define fastcc <256 x float> @insert_rr_v256f32(i32 signext %idx, float %s) { ; CHECK-LABEL: insert_rr_v256f32: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lsv %v0(%s0), %s1 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x float> undef, float %s, i32 %idx @@ -193,7 +200,10 @@ ; CHECK-LABEL: insert_rr_v512f32: ; CHECK: # %bb.0: ; CHECK-NEXT: sra.l %s1, %s1, 32 -; CHECK-NEXT: srl %s2, %s0, 1 +; CHECK-NEXT: lea %s2, -2 +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: and %s2, %s0, %s2 +; CHECK-NEXT: srl %s2, %s2, 1 ; CHECK-NEXT: lvs %s3, %v0(%s2) ; CHECK-NEXT: nnd %s0, %s0, (63)0 ; CHECK-NEXT: sla.w.sx %s0, %s0, 5 diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll --- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll @@ -97,8 +97,17 @@ ; CHECK-LABEL: swizzle_one_i8x16: ; CHECK: .functype swizzle_one_i8x16 (v128, v128) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1 -; CHECK-NEXT: return $pop0 +; CHECK-NEXT: global.get $push5=, __stack_pointer +; CHECK-NEXT: i32.const $push6=, 16 +; CHECK-NEXT: i32.sub $push8=, $pop5, $pop6 +; CHECK-NEXT: local.tee $push7=, $2=, $pop8 +; CHECK-NEXT: v128.store 0($pop7), $0 +; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 0 +; CHECK-NEXT: i32.const $push1=, 15 +; CHECK-NEXT: i32.and $push2=, $pop0, $pop1 +; CHECK-NEXT: i32.or $push3=, $2, $pop2 +; CHECK-NEXT: v128.load8_splat $push4=, 0($pop3) +; CHECK-NEXT: return $pop4 %m0 = extractelement <16 x i8> %mask, i32 0 %s0 = extractelement <16 x i8> %src, i8 %m0 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 @@ -109,8 +118,107 @@ ; CHECK-LABEL: swizzle_all_i8x16: ; CHECK: .functype swizzle_all_i8x16 (v128, v128) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1 -; CHECK-NEXT: return $pop0 +; CHECK-NEXT: global.get $push80=, __stack_pointer +; CHECK-NEXT: i32.const $push81=, 16 +; CHECK-NEXT: i32.sub $push98=, $pop80, $pop81 +; CHECK-NEXT: local.tee $push97=, $2=, $pop98 +; CHECK-NEXT: v128.store 0($pop97), $0 +; CHECK-NEXT: i8x16.extract_lane_u $push61=, $1, 0 +; CHECK-NEXT: i32.const $push1=, 15 +; CHECK-NEXT: i32.and $push62=, $pop61, $pop1 +; CHECK-NEXT: i32.or $push63=, $2, $pop62 +; CHECK-NEXT: v128.load8_splat $push64=, 0($pop63) +; CHECK-NEXT: i8x16.extract_lane_u $push57=, $1, 1 +; CHECK-NEXT: i32.const $push96=, 15 +; CHECK-NEXT: i32.and $push58=, $pop57, $pop96 +; CHECK-NEXT: i32.or $push59=, $2, $pop58 +; CHECK-NEXT: i32.load8_u $push60=, 0($pop59) +; CHECK-NEXT: i8x16.replace_lane $push65=, $pop64, 1, $pop60 +; CHECK-NEXT: i8x16.extract_lane_u $push53=, $1, 2 +; CHECK-NEXT: i32.const $push95=, 15 +; CHECK-NEXT: i32.and $push54=, $pop53, $pop95 +; CHECK-NEXT: i32.or $push55=, $2, $pop54 +; CHECK-NEXT: i32.load8_u $push56=, 0($pop55) +; CHECK-NEXT: i8x16.replace_lane $push66=, $pop65, 2, $pop56 +; CHECK-NEXT: i8x16.extract_lane_u $push49=, $1, 3 +; CHECK-NEXT: i32.const $push94=, 15 +; CHECK-NEXT: i32.and $push50=, $pop49, $pop94 +; CHECK-NEXT: i32.or $push51=, $2, $pop50 +; CHECK-NEXT: i32.load8_u $push52=, 0($pop51) +; CHECK-NEXT: i8x16.replace_lane $push67=, $pop66, 3, $pop52 +; CHECK-NEXT: i8x16.extract_lane_u $push45=, $1, 4 +; CHECK-NEXT: i32.const $push93=, 15 +; CHECK-NEXT: i32.and $push46=, $pop45, $pop93 +; CHECK-NEXT: i32.or $push47=, $2, $pop46 +; CHECK-NEXT: i32.load8_u $push48=, 0($pop47) +; CHECK-NEXT: i8x16.replace_lane $push68=, $pop67, 4, $pop48 +; CHECK-NEXT: i8x16.extract_lane_u $push41=, $1, 5 +; CHECK-NEXT: i32.const $push92=, 15 +; CHECK-NEXT: i32.and $push42=, $pop41, $pop92 +; CHECK-NEXT: i32.or $push43=, $2, $pop42 +; CHECK-NEXT: i32.load8_u $push44=, 0($pop43) +; CHECK-NEXT: i8x16.replace_lane $push69=, $pop68, 5, $pop44 +; CHECK-NEXT: i8x16.extract_lane_u $push37=, $1, 6 +; CHECK-NEXT: i32.const $push91=, 15 +; CHECK-NEXT: i32.and $push38=, $pop37, $pop91 +; CHECK-NEXT: i32.or $push39=, $2, $pop38 +; CHECK-NEXT: i32.load8_u $push40=, 0($pop39) +; CHECK-NEXT: i8x16.replace_lane $push70=, $pop69, 6, $pop40 +; CHECK-NEXT: i8x16.extract_lane_u $push33=, $1, 7 +; CHECK-NEXT: i32.const $push90=, 15 +; CHECK-NEXT: i32.and $push34=, $pop33, $pop90 +; CHECK-NEXT: i32.or $push35=, $2, $pop34 +; CHECK-NEXT: i32.load8_u $push36=, 0($pop35) +; CHECK-NEXT: i8x16.replace_lane $push71=, $pop70, 7, $pop36 +; CHECK-NEXT: i8x16.extract_lane_u $push29=, $1, 8 +; CHECK-NEXT: i32.const $push89=, 15 +; CHECK-NEXT: i32.and $push30=, $pop29, $pop89 +; CHECK-NEXT: i32.or $push31=, $2, $pop30 +; CHECK-NEXT: i32.load8_u $push32=, 0($pop31) +; CHECK-NEXT: i8x16.replace_lane $push72=, $pop71, 8, $pop32 +; CHECK-NEXT: i8x16.extract_lane_u $push25=, $1, 9 +; CHECK-NEXT: i32.const $push88=, 15 +; CHECK-NEXT: i32.and $push26=, $pop25, $pop88 +; CHECK-NEXT: i32.or $push27=, $2, $pop26 +; CHECK-NEXT: i32.load8_u $push28=, 0($pop27) +; CHECK-NEXT: i8x16.replace_lane $push73=, $pop72, 9, $pop28 +; CHECK-NEXT: i8x16.extract_lane_u $push21=, $1, 10 +; CHECK-NEXT: i32.const $push87=, 15 +; CHECK-NEXT: i32.and $push22=, $pop21, $pop87 +; CHECK-NEXT: i32.or $push23=, $2, $pop22 +; CHECK-NEXT: i32.load8_u $push24=, 0($pop23) +; CHECK-NEXT: i8x16.replace_lane $push74=, $pop73, 10, $pop24 +; CHECK-NEXT: i8x16.extract_lane_u $push17=, $1, 11 +; CHECK-NEXT: i32.const $push86=, 15 +; CHECK-NEXT: i32.and $push18=, $pop17, $pop86 +; CHECK-NEXT: i32.or $push19=, $2, $pop18 +; CHECK-NEXT: i32.load8_u $push20=, 0($pop19) +; CHECK-NEXT: i8x16.replace_lane $push75=, $pop74, 11, $pop20 +; CHECK-NEXT: i8x16.extract_lane_u $push13=, $1, 12 +; CHECK-NEXT: i32.const $push85=, 15 +; CHECK-NEXT: i32.and $push14=, $pop13, $pop85 +; CHECK-NEXT: i32.or $push15=, $2, $pop14 +; CHECK-NEXT: i32.load8_u $push16=, 0($pop15) +; CHECK-NEXT: i8x16.replace_lane $push76=, $pop75, 12, $pop16 +; CHECK-NEXT: i8x16.extract_lane_u $push9=, $1, 13 +; CHECK-NEXT: i32.const $push84=, 15 +; CHECK-NEXT: i32.and $push10=, $pop9, $pop84 +; CHECK-NEXT: i32.or $push11=, $2, $pop10 +; CHECK-NEXT: i32.load8_u $push12=, 0($pop11) +; CHECK-NEXT: i8x16.replace_lane $push77=, $pop76, 13, $pop12 +; CHECK-NEXT: i8x16.extract_lane_u $push5=, $1, 14 +; CHECK-NEXT: i32.const $push83=, 15 +; CHECK-NEXT: i32.and $push6=, $pop5, $pop83 +; CHECK-NEXT: i32.or $push7=, $2, $pop6 +; CHECK-NEXT: i32.load8_u $push8=, 0($pop7) +; CHECK-NEXT: i8x16.replace_lane $push78=, $pop77, 14, $pop8 +; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 15 +; CHECK-NEXT: i32.const $push82=, 15 +; CHECK-NEXT: i32.and $push2=, $pop0, $pop82 +; CHECK-NEXT: i32.or $push3=, $2, $pop2 +; CHECK-NEXT: i32.load8_u $push4=, 0($pop3) +; CHECK-NEXT: i8x16.replace_lane $push79=, $pop78, 15, $pop4 +; CHECK-NEXT: return $pop79 %m0 = extractelement <16 x i8> %mask, i32 0 %s0 = extractelement <16 x i8> %src, i8 %m0 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 @@ -210,14 +318,25 @@ ; CHECK-LABEL: mashup_swizzle_i8x16: ; CHECK: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1 -; CHECK-NEXT: i8x16.replace_lane $push1=, $pop0, 3, $2 -; CHECK-NEXT: i32.const $push2=, 42 -; CHECK-NEXT: i8x16.replace_lane $push3=, $pop1, 4, $pop2 -; CHECK-NEXT: i8x16.replace_lane $push4=, $pop3, 12, $2 -; CHECK-NEXT: i32.const $push6=, 42 -; CHECK-NEXT: i8x16.replace_lane $push5=, $pop4, 14, $pop6 -; CHECK-NEXT: return $pop5 +; CHECK-NEXT: global.get $push12=, __stack_pointer +; CHECK-NEXT: i32.const $push13=, 16 +; CHECK-NEXT: i32.sub $push16=, $pop12, $pop13 +; CHECK-NEXT: local.tee $push15=, $3=, $pop16 +; CHECK-NEXT: v128.store 0($pop15), $0 +; CHECK-NEXT: i8x16.extract_lane_u $push7=, $1, 7 +; CHECK-NEXT: i32.const $push1=, 15 +; CHECK-NEXT: i32.and $push8=, $pop7, $pop1 +; CHECK-NEXT: i32.or $push9=, $3, $pop8 +; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 0 +; CHECK-NEXT: i32.const $push14=, 15 +; CHECK-NEXT: i32.and $push2=, $pop0, $pop14 +; CHECK-NEXT: i32.or $push3=, $3, $pop2 +; CHECK-NEXT: v128.const $push4=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0 +; CHECK-NEXT: v128.load8_lane $push5=, 0($pop3), $pop4, 0 +; CHECK-NEXT: i8x16.replace_lane $push6=, $pop5, 3, $2 +; CHECK-NEXT: v128.load8_lane $push10=, 0($pop9), $pop6, 7 +; CHECK-NEXT: i8x16.replace_lane $push11=, $pop10, 12, $2 +; CHECK-NEXT: return $pop11 %m0 = extractelement <16 x i8> %mask, i32 0 %s0 = extractelement <16 x i8> %src, i8 %m0 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 diff --git a/llvm/test/CodeGen/X86/extract-insert.ll b/llvm/test/CodeGen/X86/extract-insert.ll --- a/llvm/test/CodeGen/X86/extract-insert.ll +++ b/llvm/test/CodeGen/X86/extract-insert.ll @@ -5,6 +5,7 @@ define i32 @extractelt_undef_insertelt(i32 %x, i32 %y) { ; CHECK-LABEL: extractelt_undef_insertelt: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ret{{[l|q]}} %b = insertelement <4 x i32> zeroinitializer, i32 %x, i64 3 %c = icmp uge i32 %y, %y diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll --- a/llvm/test/CodeGen/X86/insertelement-var-index.ll +++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll @@ -996,7 +996,7 @@ ; ; AVX512-LABEL: arg_i64_v2i64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %xmm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 ; AVX512-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} @@ -1101,7 +1101,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: movapd %xmm0, %xmm2 ; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0] -; SSE41-NEXT: movslq %edi, %rax +; SSE41-NEXT: movl %edi, %eax ; SSE41-NEXT: movq %rax, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -1112,7 +1112,7 @@ ; AVX1-LABEL: arg_f64_v2f64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] -; AVX1-NEXT: movslq %edi, %rax +; AVX1-NEXT: movl %edi, %eax ; AVX1-NEXT: vmovq %rax, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 @@ -1122,7 +1122,7 @@ ; AVX2-LABEL: arg_f64_v2f64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] -; AVX2-NEXT: movslq %edi, %rax +; AVX2-NEXT: movl %edi, %eax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 @@ -1131,7 +1131,7 @@ ; ; AVX512-LABEL: arg_f64_v2f64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %edi, %rax +; AVX512-NEXT: movl %edi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %xmm2 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1 ; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] @@ -1346,7 +1346,7 @@ ; ; AVX512-LABEL: load_i64_v2i64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %xmm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 ; AVX512-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1} @@ -1458,7 +1458,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: movapd %xmm0, %xmm1 ; SSE41-NEXT: movddup {{.*#+}} xmm2 = mem[0,0] -; SSE41-NEXT: movslq %esi, %rax +; SSE41-NEXT: movl %esi, %eax ; SSE41-NEXT: movq %rax, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -1469,7 +1469,7 @@ ; AVX1-LABEL: load_f64_v2f64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; AVX1-NEXT: movslq %esi, %rax +; AVX1-NEXT: movl %esi, %eax ; AVX1-NEXT: vmovq %rax, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 @@ -1479,7 +1479,7 @@ ; AVX2-LABEL: load_f64_v2f64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; AVX2-NEXT: movslq %esi, %rax +; AVX2-NEXT: movl %esi, %eax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 @@ -1488,7 +1488,7 @@ ; ; AVX512-LABEL: load_f64_v2f64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %xmm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 ; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0] @@ -1733,7 +1733,7 @@ ; ; AVX512-LABEL: arg_i64_v4i64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %ymm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 ; AVX512-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} @@ -1834,7 +1834,7 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 -; AVX1-NEXT: movslq %edi, %rax +; AVX1-NEXT: movl %edi, %eax ; AVX1-NEXT: vmovq %rax, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 @@ -1846,7 +1846,7 @@ ; AVX2-LABEL: arg_f64_v4f64: ; AVX2: # %bb.0: ; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 -; AVX2-NEXT: movslq %edi, %rax +; AVX2-NEXT: movl %edi, %eax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 @@ -1855,7 +1855,7 @@ ; ; AVX512-LABEL: arg_f64_v4f64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %edi, %rax +; AVX512-NEXT: movl %edi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %ymm2 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %k1 ; AVX512-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1} @@ -2114,7 +2114,7 @@ ; ; AVX512-LABEL: load_i64_v4i64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %ymm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 ; AVX512-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} @@ -2218,7 +2218,7 @@ ; ; AVX1-LABEL: load_f64_v4f64: ; AVX1: # %bb.0: -; AVX1-NEXT: movslq %esi, %rax +; AVX1-NEXT: movl %esi, %eax ; AVX1-NEXT: vmovq %rax, %xmm1 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 @@ -2231,7 +2231,7 @@ ; AVX2-LABEL: load_f64_v4f64: ; AVX2: # %bb.0: ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1 -; AVX2-NEXT: movslq %esi, %rax +; AVX2-NEXT: movl %esi, %eax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 @@ -2240,7 +2240,7 @@ ; ; AVX512-LABEL: load_f64_v4f64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %ymm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 ; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} @@ -2273,6 +2273,15 @@ ; SSE-LABEL: PR44139: ; SSE: # %bb.0: ; SSE-NEXT: movl (%rdi), %eax +; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] +; SSE-NEXT: movdqa %xmm0, 96(%rdi) +; SSE-NEXT: movdqa %xmm0, 112(%rdi) +; SSE-NEXT: movdqa %xmm0, 64(%rdi) +; SSE-NEXT: movdqa %xmm0, 80(%rdi) +; SSE-NEXT: movdqa %xmm0, 32(%rdi) +; SSE-NEXT: movdqa %xmm0, 48(%rdi) +; SSE-NEXT: movdqa %xmm0, (%rdi) +; SSE-NEXT: movdqa %xmm0, 16(%rdi) ; SSE-NEXT: leal 2147483647(%rax), %ecx ; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: cmovnsl %eax, %ecx @@ -2283,23 +2292,51 @@ ; SSE-NEXT: divl %ecx ; SSE-NEXT: retq ; -; AVX-LABEL: PR44139: -; AVX: # %bb.0: -; AVX-NEXT: movl (%rdi), %eax -; AVX-NEXT: leal 2147483647(%rax), %ecx -; AVX-NEXT: testl %eax, %eax -; AVX-NEXT: cmovnsl %eax, %ecx -; AVX-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 -; AVX-NEXT: addl %eax, %ecx -; AVX-NEXT: # kill: def $eax killed $eax killed $rax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %ecx -; AVX-NEXT: retq +; AVX1OR2-LABEL: PR44139: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX1OR2-NEXT: movl (%rdi), %eax +; AVX1OR2-NEXT: vmovaps %ymm0, 64(%rdi) +; AVX1OR2-NEXT: vmovaps %ymm0, 96(%rdi) +; AVX1OR2-NEXT: vmovaps %ymm0, (%rdi) +; AVX1OR2-NEXT: vmovaps %ymm0, 32(%rdi) +; AVX1OR2-NEXT: leal 2147483647(%rax), %ecx +; AVX1OR2-NEXT: testl %eax, %eax +; AVX1OR2-NEXT: cmovnsl %eax, %ecx +; AVX1OR2-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 +; AVX1OR2-NEXT: addl %eax, %ecx +; AVX1OR2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX1OR2-NEXT: xorl %edx, %edx +; AVX1OR2-NEXT: divl %ecx +; AVX1OR2-NEXT: vzeroupper +; AVX1OR2-NEXT: retq +; +; AVX512-LABEL: PR44139: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastsd (%rdi), %zmm0 +; AVX512-NEXT: movl (%rdi), %eax +; AVX512-NEXT: vmovaps %zmm0, (%rdi) +; AVX512-NEXT: vmovaps %zmm0, 64(%rdi) +; AVX512-NEXT: leal 2147483647(%rax), %ecx +; AVX512-NEXT: testl %eax, %eax +; AVX512-NEXT: cmovnsl %eax, %ecx +; AVX512-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 +; AVX512-NEXT: addl %eax, %ecx +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: xorl %edx, %edx +; AVX512-NEXT: divl %ecx +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq ; ; X86AVX2-LABEL: PR44139: ; X86AVX2: # %bb.0: -; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86AVX2-NEXT: movl (%eax), %eax +; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86AVX2-NEXT: movl (%ecx), %eax +; X86AVX2-NEXT: vbroadcastsd (%ecx), %ymm0 +; X86AVX2-NEXT: vmovaps %ymm0, 64(%ecx) +; X86AVX2-NEXT: vmovaps %ymm0, 96(%ecx) +; X86AVX2-NEXT: vmovaps %ymm0, (%ecx) +; X86AVX2-NEXT: vmovaps %ymm0, 32(%ecx) ; X86AVX2-NEXT: leal 2147483647(%eax), %ecx ; X86AVX2-NEXT: testl %eax, %eax ; X86AVX2-NEXT: cmovnsl %eax, %ecx @@ -2307,6 +2344,7 @@ ; X86AVX2-NEXT: addl %eax, %ecx ; X86AVX2-NEXT: xorl %edx, %edx ; X86AVX2-NEXT: divl %ecx +; X86AVX2-NEXT: vzeroupper ; X86AVX2-NEXT: retl %L = load <16 x i64>, ptr %p %E1 = extractelement <16 x i64> %L, i64 0 diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll --- a/llvm/test/CodeGen/X86/var-permute-128.ll +++ b/llvm/test/CodeGen/X86/var-permute-128.ll @@ -129,7 +129,7 @@ define <8 x i16> @var_shuffle_v8i16(<8 x i16> %v, <8 x i16> %indices) nounwind { ; SSE3-LABEL: var_shuffle_v8i16: ; SSE3: # %bb.0: -; SSE3-NEXT: movd %xmm1, %eax +; SSE3-NEXT: pextrw $0, %xmm1, %eax ; SSE3-NEXT: pextrw $1, %xmm1, %ecx ; SSE3-NEXT: pextrw $2, %xmm1, %edx ; SSE3-NEXT: pextrw $3, %xmm1, %esi diff --git a/llvm/test/CodeGen/X86/var-permute-512.ll b/llvm/test/CodeGen/X86/var-permute-512.ll --- a/llvm/test/CodeGen/X86/var-permute-512.ll +++ b/llvm/test/CodeGen/X86/var-permute-512.ll @@ -101,7 +101,7 @@ ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; AVX512F-NEXT: vmovd %xmm4, %eax +; AVX512F-NEXT: vpextrw $0, %xmm4, %eax ; AVX512F-NEXT: vmovaps %zmm0, (%rsp) ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax @@ -127,7 +127,7 @@ ; AVX512F-NEXT: vpextrw $7, %xmm4, %eax ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0 -; AVX512F-NEXT: vmovd %xmm3, %eax +; AVX512F-NEXT: vpextrw $0, %xmm3, %eax ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -152,7 +152,7 @@ ; AVX512F-NEXT: vpextrw $7, %xmm3, %eax ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm4, %xmm3 -; AVX512F-NEXT: vmovd %xmm2, %eax +; AVX512F-NEXT: vpextrw $0, %xmm2, %eax ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -180,7 +180,7 @@ ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax ; AVX512F-NEXT: vpinsrw $7, %eax, %xmm4, %xmm2 -; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: vpextrw $0, %xmm1, %eax ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -330,7 +330,7 @@ ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; AVX512F-NEXT: vmovd %xmm4, %eax +; AVX512F-NEXT: vpextrb $0, %xmm4, %eax ; AVX512F-NEXT: vmovaps %zmm0, (%rsp) ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax @@ -380,7 +380,7 @@ ; AVX512F-NEXT: vpextrb $15, %xmm4, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0 -; AVX512F-NEXT: vmovd %xmm3, %eax +; AVX512F-NEXT: vpextrb $0, %xmm3, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -432,7 +432,7 @@ ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3 -; AVX512F-NEXT: vmovd %xmm2, %eax +; AVX512F-NEXT: vpextrb $0, %xmm2, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -485,7 +485,7 @@ ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2 -; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: vpextrb $0, %xmm1, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -555,7 +555,7 @@ ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; AVX512BW-NEXT: vmovd %xmm4, %eax +; AVX512BW-NEXT: vpextrb $0, %xmm4, %eax ; AVX512BW-NEXT: vmovaps %zmm0, (%rsp) ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax @@ -605,7 +605,7 @@ ; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0 -; AVX512BW-NEXT: vmovd %xmm3, %eax +; AVX512BW-NEXT: vpextrb $0, %xmm3, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vmovd %eax, %xmm4 @@ -657,7 +657,7 @@ ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3 -; AVX512BW-NEXT: vmovd %xmm2, %eax +; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vmovd %eax, %xmm4 @@ -710,7 +710,7 @@ ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: vmovd %xmm1, %eax +; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vmovd %eax, %xmm4 diff --git a/llvm/test/CodeGen/X86/vec_extract.ll b/llvm/test/CodeGen/X86/vec_extract.ll --- a/llvm/test/CodeGen/X86/vec_extract.ll +++ b/llvm/test/CodeGen/X86/vec_extract.ll @@ -110,11 +110,15 @@ ; X32-LABEL: ossfuzz15662: ; X32: # %bb.0: ; X32-NEXT: xorps %xmm0, %xmm0 +; X32-NEXT: movaps %xmm0, (%eax) +; X32-NEXT: xorps %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: ossfuzz15662: ; X64: # %bb.0: ; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, (%rax) +; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: retq %C10 = icmp ule i1 false, false %C3 = icmp ule i1 true, undef