diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -9690,8 +9690,8 @@ The first operand of an '``extractelement``' instruction is a value of :ref:`vector ` type. The second operand is an index indicating -the position from which to extract the element. The index may be a -variable of any integer type. +the position from which to extract the element. The index may be a +variable of any integer type, and will be treated as an unsigned integer. Semantics: """""""""" @@ -9735,8 +9735,10 @@ The first operand of an '``insertelement``' instruction is a value of :ref:`vector ` type. The second operand is a scalar value whose type must equal the element type of the first operand. The third operand -is an index indicating the position at which to insert the value. The -index may be a variable of any integer type. +is an index indicating the position at which to insert the value. The index +may be a variable of any integer type, and will be treated as an unsigned +integer. + Semantics: """""""""" diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2808,7 +2808,7 @@ Register Idx; if (auto *CI = dyn_cast(U.getOperand(1))) { if (CI->getBitWidth() != PreferredVecIdxWidth) { - APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth); + APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth); auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx); Idx = getOrCreateVReg(*NewIdxCI); } @@ -2817,7 +2817,7 @@ Idx = getOrCreateVReg(*U.getOperand(1)); if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) { const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); - Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx).getReg(0); + Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0); } MIRBuilder.buildExtractVectorElement(Res, Val, Idx); return true; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3545,7 +3545,7 @@ const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); - SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), + SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), @@ -3555,7 +3555,7 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); - SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), + SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -1549,7 +1549,7 @@ ; CHECK-LABEL: name: test_extractelement ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0 ; CHECK: [[IDX:%[0-9]+]]:_(s32) = COPY $w0 -; CHECK: [[IDXEXT:%[0-9]+]]:_(s64) = G_SEXT [[IDX]] +; CHECK: [[IDXEXT:%[0-9]+]]:_(s64) = G_ZEXT [[IDX]] ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDXEXT]](s64) ; CHECK: $w0 = COPY [[RES]](s32) %res = extractelement <2 x i32> %vec, i32 %idx @@ -1566,6 +1566,27 @@ ret i32 %res } +define i32 @test_extractelement_const_idx_zext_i1(<2 x i32> %vec) { +; CHECK-LABEL: name: test_extractelement +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0 +; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s64) +; CHECK: $w0 = COPY [[RES]](s32) + %res = extractelement <2 x i32> %vec, i1 true + ret i32 %res +} + +define i32 @test_extractelement_const_idx_zext_i8(<2 x i32> %vec) { +; CHECK-LABEL: name: test_extractelement +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0 +; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s64) +; CHECK: $w0 = COPY [[RES]](s32) + %res = extractelement <2 x i32> %vec, i8 255 + ret i32 %res +} + + define i32 @test_singleelementvector(i32 %elt){ ; CHECK-LABEL: name: test_singleelementvector ; CHECK: [[ELT:%[0-9]+]]:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -9143,8 +9143,7 @@ define i32 @load_single_extract_variable_index_v3i32_small_align(<3 x i32>* %A, i32 %idx) { ; CHECK-LABEL: load_single_extract_variable_index_v3i32_small_align: ; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: mov w9, w1 ; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: cmp x9, #2 ; CHECK-NEXT: csel x8, x9, x8, lo @@ -9158,8 +9157,7 @@ define i32 @load_single_extract_variable_index_v3i32_default_align(<3 x i32>* %A, i32 %idx) { ; CHECK-LABEL: load_single_extract_variable_index_v3i32_default_align: ; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: mov w9, w1 ; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: cmp x9, #2 ; CHECK-NEXT: csel x8, x9, x8, lo diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll @@ -292,8 +292,7 @@ define i8 @test_lanex_16xi8( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.b, xzr, x8 ; CHECK-NEXT: lastb w0, p0, z0.b ; CHECK-NEXT: ret @@ -304,8 +303,7 @@ define i16 @test_lanex_8xi16( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_8xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.h, xzr, x8 ; CHECK-NEXT: lastb w0, p0, z0.h ; CHECK-NEXT: ret @@ -316,8 +314,7 @@ define i32 @test_lanex_4xi32( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.s, xzr, x8 ; CHECK-NEXT: lastb w0, p0, z0.s ; CHECK-NEXT: ret @@ -328,8 +325,7 @@ define i64 @test_lanex_2xi64( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb x0, p0, z0.d ; CHECK-NEXT: ret @@ -340,8 +336,7 @@ define half @test_lanex_8xf16( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_8xf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.h, xzr, x8 ; CHECK-NEXT: lastb h0, p0, z0.h ; CHECK-NEXT: ret @@ -352,8 +347,7 @@ define half @test_lanex_4xf16( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_4xf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.s, xzr, x8 ; CHECK-NEXT: lastb h0, p0, z0.h ; CHECK-NEXT: ret @@ -364,8 +358,7 @@ define half @test_lanex_2xf16( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_2xf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb h0, p0, z0.h ; CHECK-NEXT: ret @@ -376,8 +369,7 @@ define float @test_lanex_4xf32( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_4xf32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.s, xzr, x8 ; CHECK-NEXT: lastb s0, p0, z0.s ; CHECK-NEXT: ret @@ -388,8 +380,7 @@ define float @test_lanex_2xf32( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_2xf32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb s0, p0, z0.s ; CHECK-NEXT: ret @@ -400,8 +391,7 @@ define double @test_lanex_2xf64( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_2xf64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb d0, p0, z0.d ; CHECK-NEXT: ret @@ -518,8 +508,7 @@ define i1 @test_lanex_4xi1( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_4xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 ; CHECK-NEXT: whilels p0.s, xzr, x8 ; CHECK-NEXT: lastb w8, p0, z0.s diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -128,8 +128,7 @@ define @test_lanex_16xi8( %a, i32 %x) { ; CHECK-LABEL: test_lanex_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov w9, #30 ; CHECK-NEXT: index z2.b, #0, #1 ; CHECK-NEXT: ptrue p0.b @@ -389,8 +388,7 @@ define @test_predicate_insert_8xi1_immediate ( %val, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_8xi1_immediate: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov w9, #1 ; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: ptrue p1.h @@ -427,8 +425,7 @@ define @test_predicate_insert_2xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_2xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w8, w1 ; CHECK-NEXT: index z1.d, #0, #1 ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -446,8 +443,7 @@ define @test_predicate_insert_4xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_4xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w8, w1 ; CHECK-NEXT: index z1.s, #0, #1 ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, w8 @@ -463,8 +459,7 @@ define @test_predicate_insert_8xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_8xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w8, w1 ; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z0.h, w8 @@ -481,8 +476,7 @@ define @test_predicate_insert_16xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_16xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w8, w1 ; CHECK-NEXT: index z1.b, #0, #1 ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: mov z0.b, w8 @@ -505,8 +499,7 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: mov w9, w1 ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1b { z0.b }, p1, [sp, #1, mul vl] diff --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll @@ -6,8 +6,7 @@ define i32 @promote_extract_2i32_idx( %a, i32 %idx) { ; CHECK-LABEL: promote_extract_2i32_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb x0, p0, z0.d ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -25,8 +24,7 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1b { z0.b }, p0, [sp] @@ -51,8 +49,7 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1h { z0.h }, p0, [sp] @@ -77,8 +74,7 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: cnth x8 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmp x9, x8 @@ -103,8 +99,7 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: cnth x8 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmp x9, x8 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -march=amdgcn -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s + +define i8 @f_i1_1() { + ; CHECK-LABEL: name: f_i1_1 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<256 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %E1 = extractelement <256 x i8> undef, i1 true + ret i8 %E1 +} + +define i8 @f_i8_255() { + ; CHECK-LABEL: name: f_i8_255 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<256 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %E1 = extractelement <256 x i8> undef, i8 255 + ret i8 %E1 +} diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations.ll b/llvm/test/CodeGen/Mips/msa/basic_operations.ll --- a/llvm/test/CodeGen/Mips/msa/basic_operations.ll +++ b/llvm/test/CodeGen/Mips/msa/basic_operations.ll @@ -1315,7 +1315,7 @@ ; N64-NEXT: ld.b $w0, 0($2) ; N64-NEXT: addv.b $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.b $w0, $w0[$1] ; N64-NEXT: mfc1 $1, $f0 ; N64-NEXT: sra $1, $1, 24 @@ -1371,7 +1371,7 @@ ; N64-NEXT: ld.h $w0, 0($2) ; N64-NEXT: addv.h $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.h $w0, $w0[$1] ; N64-NEXT: mfc1 $1, $f0 ; N64-NEXT: sra $1, $1, 16 @@ -1423,7 +1423,7 @@ ; N64-NEXT: ld.w $w0, 0($2) ; N64-NEXT: addv.w $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.w $w0, $w0[$1] ; N64-NEXT: jr $ra ; N64-NEXT: mfc1 $2, $f0 @@ -1495,7 +1495,7 @@ ; N64-NEXT: ld.d $w0, 0($2) ; N64-NEXT: addv.d $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.d $w0, $w0[$1] ; N64-NEXT: jr $ra ; N64-NEXT: dmfc1 $2, $f0 @@ -1546,7 +1546,7 @@ ; N64-NEXT: ld.b $w0, 0($2) ; N64-NEXT: addv.b $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.b $w0, $w0[$1] ; N64-NEXT: mfc1 $1, $f0 ; N64-NEXT: jr $ra @@ -1599,7 +1599,7 @@ ; N64-NEXT: ld.h $w0, 0($2) ; N64-NEXT: addv.h $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.h $w0, $w0[$1] ; N64-NEXT: mfc1 $1, $f0 ; N64-NEXT: jr $ra @@ -1650,7 +1650,7 @@ ; N64-NEXT: ld.w $w0, 0($2) ; N64-NEXT: addv.w $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.w $w0, $w0[$1] ; N64-NEXT: jr $ra ; N64-NEXT: mfc1 $2, $f0 @@ -1722,7 +1722,7 @@ ; N64-NEXT: ld.d $w0, 0($2) ; N64-NEXT: addv.d $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.d $w0, $w0[$1] ; N64-NEXT: jr $ra ; N64-NEXT: dmfc1 $2, $f0 @@ -1934,7 +1934,7 @@ ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lw $2, 0($2) +; N64-NEXT: lwu $2, 0($2) ; N64-NEXT: ld $1, %got_disp(v16i8)($1) ; N64-NEXT: ld.b $w0, 0($1) ; N64-NEXT: sld.b $w0, $w0[$2] @@ -1994,7 +1994,7 @@ ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lw $2, 0($2) +; N64-NEXT: lwu $2, 0($2) ; N64-NEXT: ld $1, %got_disp(v8i16)($1) ; N64-NEXT: ld.h $w0, 0($1) ; N64-NEXT: dsll $2, $2, 1 @@ -2055,7 +2055,7 @@ ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lw $2, 0($2) +; N64-NEXT: lwu $2, 0($2) ; N64-NEXT: ld $1, %got_disp(v4i32)($1) ; N64-NEXT: ld.w $w0, 0($1) ; N64-NEXT: dsll $2, $2, 2 @@ -2124,7 +2124,7 @@ ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lw $2, 0($2) +; N64-NEXT: lwu $2, 0($2) ; N64-NEXT: ld $1, %got_disp(v2i64)($1) ; N64-NEXT: ld.d $w0, 0($1) ; N64-NEXT: dsll $2, $2, 3 diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll b/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll --- a/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll +++ b/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll @@ -193,10 +193,9 @@ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) %4 = extractelement <4 x float> %2, i32 %3 - ; ALL-DAG: splat.w $w0, [[R1]][[[IDX]]] + ; ALL-DAG: splat.w $w0, [[R1]][[[PTR_I]]] ret float %4 } @@ -259,10 +258,9 @@ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) %4 = extractelement <2 x double> %2, i32 %3 - ; ALL-DAG: splat.d $w0, [[R1]][[[IDX]]] + ; ALL-DAG: splat.d $w0, [[R1]][[[PTR_I]]] ret double %4 } @@ -312,11 +310,10 @@ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) %3 = insertelement <4 x float> %1, float %a, i32 %2 ; float argument passed in $f12 - ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2 + ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[PTR_I]], 2 ; ALL-DAG: sld.b [[R1]], [[R1]][[[BIDX]]] ; ALL-DAG: insve.w [[R1]][0], $w12[0] ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] @@ -341,11 +338,10 @@ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) %3 = insertelement <2 x double> %1, double %a, i32 %2 ; double argument passed in $f12 - ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 3 + ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[PTR_I]], 3 ; ALL-DAG: sld.b [[R1]], [[R1]][[[BIDX]]] ; ALL-DAG: insve.d [[R1]][0], $w12[0] ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] diff --git a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll --- a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll +++ b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll @@ -715,7 +715,6 @@ define double @conv2dlbTestuiVar(<4 x i32> %a, i32 zeroext %elem) { ; CHECK-64-LABEL: conv2dlbTestuiVar: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: extsw 3, 3 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-64-NEXT: vextuwlx 3, 3, 2 ; CHECK-64-NEXT: mtfprwz 0, 3 diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll --- a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll @@ -5,6 +5,7 @@ define zeroext i8 @test1(<16 x i8> %a, i32 signext %index) { ; CHECK-64-LABEL: test1: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: vextublx 3, 3, 2 ; CHECK-64-NEXT: clrldi 3, 3, 56 ; CHECK-64-NEXT: blr @@ -24,6 +25,7 @@ define signext i8 @test2(<16 x i8> %a, i32 signext %index) { ; CHECK-64-LABEL: test2: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: vextublx 3, 3, 2 ; CHECK-64-NEXT: extsb 3, 3 ; CHECK-64-NEXT: blr @@ -44,6 +46,7 @@ define zeroext i16 @test3(<8 x i16> %a, i32 signext %index) { ; CHECK-64-LABEL: test3: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-64-NEXT: vextuhlx 3, 3, 2 ; CHECK-64-NEXT: clrldi 3, 3, 48 @@ -64,6 +67,7 @@ define signext i16 @test4(<8 x i16> %a, i32 signext %index) { ; CHECK-64-LABEL: test4: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-64-NEXT: vextuhlx 3, 3, 2 ; CHECK-64-NEXT: extsh 3, 3 @@ -84,6 +88,7 @@ define zeroext i32 @test5(<4 x i32> %a, i32 signext %index) { ; CHECK-64-LABEL: test5: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-64-NEXT: vextuwlx 3, 3, 2 ; CHECK-64-NEXT: blr @@ -103,6 +108,7 @@ define signext i32 @test6(<4 x i32> %a, i32 signext %index) { ; CHECK-64-LABEL: test6: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-64-NEXT: vextuwlx 3, 3, 2 ; CHECK-64-NEXT: extsw 3, 3 diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll --- a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll @@ -5,6 +5,7 @@ define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) { ; CHECK-64-LABEL: test_add1: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: vextublx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 ; CHECK-64-NEXT: clrldi 3, 3, 56 @@ -31,6 +32,7 @@ define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) { ; CHECK-64-LABEL: test_add2: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: vextublx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 ; CHECK-64-NEXT: extsb 3, 3 @@ -57,6 +59,7 @@ define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) { ; CHECK-64-LABEL: test_add3: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-64-NEXT: vextuhlx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 @@ -84,6 +87,7 @@ define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) { ; CHECK-64-LABEL: test_add4: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-64-NEXT: vextuhlx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 @@ -111,6 +115,7 @@ define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) { ; CHECK-64-LABEL: test_add5: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-64-NEXT: vextuwlx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 @@ -134,6 +139,7 @@ define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) { ; CHECK-64-LABEL: test_add6: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-64-NEXT: vextuwlx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -266,8 +266,8 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) { ; CHECK-64-LABEL: testFloat1: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-DAG: rlwinm 3, 4, 2, 28, 29 -; CHECK-64-DAG: addi 4, 1, -16 +; CHECK-64-NEXT: rlwinm 3, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 4, 1, -16 ; CHECK-64-NEXT: stxv 34, -16(1) ; CHECK-64-NEXT: stfsx 1, 4, 3 ; CHECK-64-NEXT: lxv 34, -16(1) @@ -285,8 +285,7 @@ ; CHECK-64-P10-LABEL: testFloat1: ; CHECK-64-P10: # %bb.0: # %entry ; CHECK-64-P10-NEXT: xscvdpspn 35, 1 -; CHECK-64-P10-NEXT: extsw 3, 4 -; CHECK-64-P10-NEXT: slwi 3, 3, 2 +; CHECK-64-P10-NEXT: slwi 3, 4, 2 ; CHECK-64-P10-NEXT: vinswvlx 2, 3, 3 ; CHECK-64-P10-NEXT: blr ; @@ -305,16 +304,16 @@ ; CHECK-64-LABEL: testFloat2: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: lwz 6, 0(3) -; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29 -; CHECK-64-DAG: addi 7, 1, -16 +; CHECK-64-NEXT: addi 7, 1, -16 +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 ; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: rlwinm 5, 5, 2, 28, 29 ; CHECK-64-NEXT: stwx 6, 7, 4 -; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 -; CHECK-64-NEXT: addi 5, 1, -32 +; CHECK-64-NEXT: addi 4, 1, -32 ; CHECK-64-NEXT: lxv 0, -16(1) ; CHECK-64-NEXT: lwz 3, 1(3) ; CHECK-64-NEXT: stxv 0, -32(1) -; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: stwx 3, 4, 5 ; CHECK-64-NEXT: lxv 34, -32(1) ; CHECK-64-NEXT: blr ; @@ -337,12 +336,10 @@ ; CHECK-64-P10-LABEL: testFloat2: ; CHECK-64-P10: # %bb.0: # %entry ; CHECK-64-P10-NEXT: lwz 6, 0(3) -; CHECK-64-P10-NEXT: extsw 4, 4 ; CHECK-64-P10-NEXT: lwz 3, 1(3) ; CHECK-64-P10-NEXT: slwi 4, 4, 2 ; CHECK-64-P10-NEXT: vinswlx 2, 4, 6 -; CHECK-64-P10-NEXT: extsw 4, 5 -; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: slwi 4, 5, 2 ; CHECK-64-P10-NEXT: vinswlx 2, 4, 3 ; CHECK-64-P10-NEXT: blr ; @@ -370,8 +367,9 @@ ; CHECK-64-LABEL: testFloat3: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: lis 6, 1 -; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29 -; CHECK-64-DAG: addi 7, 1, -16 +; CHECK-64-NEXT: addi 7, 1, -16 +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: rlwinm 5, 5, 2, 28, 29 ; CHECK-64-NEXT: lwzx 6, 3, 6 ; CHECK-64-NEXT: stxv 34, -16(1) ; CHECK-64-NEXT: stwx 6, 7, 4 @@ -379,10 +377,9 @@ ; CHECK-64-NEXT: lxv 0, -16(1) ; CHECK-64-NEXT: rldic 4, 4, 36, 27 ; CHECK-64-NEXT: lwzx 3, 3, 4 -; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 -; CHECK-64-NEXT: addi 5, 1, -32 +; CHECK-64-NEXT: addi 4, 1, -32 ; CHECK-64-NEXT: stxv 0, -32(1) -; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: stwx 3, 4, 5 ; CHECK-64-NEXT: lxv 34, -32(1) ; CHECK-64-NEXT: blr ; @@ -406,14 +403,12 @@ ; CHECK-64-P10-LABEL: testFloat3: ; CHECK-64-P10: # %bb.0: # %entry ; CHECK-64-P10-NEXT: plwz 6, 65536(3), 0 -; CHECK-64-P10-NEXT: extsw 4, 4 ; CHECK-64-P10-NEXT: slwi 4, 4, 2 ; CHECK-64-P10-NEXT: vinswlx 2, 4, 6 ; CHECK-64-P10-NEXT: li 4, 1 ; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27 ; CHECK-64-P10-NEXT: lwzx 3, 3, 4 -; CHECK-64-P10-NEXT: extsw 4, 5 -; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: slwi 4, 5, 2 ; CHECK-64-P10-NEXT: vinswlx 2, 4, 3 ; CHECK-64-P10-NEXT: blr ; @@ -580,7 +575,7 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) { ; CHECK-64-LABEL: testDouble1: ; CHECK-64: # %bb.0: # %entry -; CHECK-64: rlwinm 3, 4, 3, 28, 28 +; CHECK-64-NEXT: rlwinm 3, 4, 3, 28, 28 ; CHECK-64-NEXT: addi 4, 1, -16 ; CHECK-64-NEXT: stxv 34, -16(1) ; CHECK-64-NEXT: stfdx 1, 4, 3 @@ -598,7 +593,6 @@ ; ; CHECK-64-P10-LABEL: testDouble1: ; CHECK-64-P10: # %bb.0: # %entry -; CHECK-64-P10-NEXT: extsw 4, 4 ; CHECK-64-P10-NEXT: mffprd 3, 1 ; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 ; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 @@ -606,8 +600,8 @@ ; ; CHECK-32-P10-LABEL: testDouble1: ; CHECK-32-P10: # %bb.0: # %entry -; CHECK-32-P10-DAG: addi 4, 1, -16 -; CHECK-32-P10-DAG: rlwinm 3, 5, 3, 28, 28 +; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28 +; CHECK-32-P10-NEXT: addi 4, 1, -16 ; CHECK-32-P10-NEXT: stxv 34, -16(1) ; CHECK-32-P10-NEXT: stfdx 1, 4, 3 ; CHECK-32-P10-NEXT: lxv 34, -16(1) @@ -621,17 +615,17 @@ ; CHECK-64-LABEL: testDouble2: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: ld 6, 0(3) -; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28 -; CHECK-64-DAG: addi 7, 1, -32 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 ; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-64-NEXT: stdx 6, 7, 4 ; CHECK-64-NEXT: li 4, 1 ; CHECK-64-NEXT: lxv 0, -32(1) ; CHECK-64-NEXT: ldx 3, 3, 4 -; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 -; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: addi 4, 1, -16 ; CHECK-64-NEXT: stxv 0, -16(1) -; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: stdx 3, 4, 5 ; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; @@ -654,20 +648,18 @@ ; CHECK-64-P10-LABEL: testDouble2: ; CHECK-64-P10: # %bb.0: # %entry ; CHECK-64-P10-NEXT: ld 6, 0(3) -; CHECK-64-P10-NEXT: extsw 4, 4 ; CHECK-64-P10-NEXT: pld 3, 1(3), 0 ; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 ; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6 -; CHECK-64-P10-NEXT: extsw 4, 5 -; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: rlwinm 4, 5, 3, 0, 28 ; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 ; CHECK-64-P10-NEXT: blr ; ; CHECK-32-P10-LABEL: testDouble2: ; CHECK-32-P10: # %bb.0: # %entry ; CHECK-32-P10-NEXT: lfd 0, 0(3) -; CHECK-32-P10-DAG: addi 6, 1, -32 -; CHECK-32-P10-DAG: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: addi 6, 1, -32 ; CHECK-32-P10-NEXT: stxv 34, -32(1) ; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: stfdx 0, 6, 4 @@ -693,8 +685,9 @@ ; CHECK-64-LABEL: testDouble3: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: lis 6, 1 -; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28 -; CHECK-64-DAG: addi 7, 1, -32 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-64-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-64-NEXT: ldx 6, 3, 6 ; CHECK-64-NEXT: stxv 34, -32(1) ; CHECK-64-NEXT: stdx 6, 7, 4 @@ -702,10 +695,9 @@ ; CHECK-64-NEXT: lxv 0, -32(1) ; CHECK-64-NEXT: rldic 4, 4, 36, 27 ; CHECK-64-NEXT: ldx 3, 3, 4 -; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 -; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: addi 4, 1, -16 ; CHECK-64-NEXT: stxv 0, -16(1) -; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: stdx 3, 4, 5 ; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; @@ -729,22 +721,20 @@ ; CHECK-64-P10-LABEL: testDouble3: ; CHECK-64-P10: # %bb.0: # %entry ; CHECK-64-P10-NEXT: pld 6, 65536(3), 0 -; CHECK-64-P10-NEXT: extsw 4, 4 ; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 ; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6 ; CHECK-64-P10-NEXT: li 4, 1 ; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27 ; CHECK-64-P10-NEXT: ldx 3, 3, 4 -; CHECK-64-P10-NEXT: extsw 4, 5 -; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: rlwinm 4, 5, 3, 0, 28 ; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 ; CHECK-64-P10-NEXT: blr ; ; CHECK-32-P10-LABEL: testDouble3: ; CHECK-32-P10: # %bb.0: # %entry ; CHECK-32-P10-NEXT: plfd 0, 65536(3), 0 -; CHECK-32-P10-DAG: addi 6, 1, -32 -; CHECK-32-P10-DAG: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: addi 6, 1, -32 ; CHECK-32-P10-NEXT: stxv 34, -32(1) ; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: stfdx 0, 6, 4 diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll --- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -1112,10 +1112,11 @@ define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) { ; CHECK-LABEL: getvelsc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 8 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 7 -; CHECK-NEXT: lvsl v3, 0, r4 -; CHECK-NEXT: andc r3, r3, r5 +; CHECK-NEXT: andi. r5, r4, 8 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 @@ -1126,10 +1127,11 @@ ; CHECK-LE-LABEL: getvelsc: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 8 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 7 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 3 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -1139,10 +1141,11 @@ ; ; CHECK-AIX-LABEL: getvelsc: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 8 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 7 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: andi. 5, 3, 8 ; CHECK-AIX-NEXT: andc 3, 4, 3 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 @@ -1160,10 +1163,11 @@ define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) { ; CHECK-LABEL: getveluc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 8 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 7 -; CHECK-NEXT: lvsl v3, 0, r4 -; CHECK-NEXT: andc r3, r3, r5 +; CHECK-NEXT: andi. r5, r4, 8 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 @@ -1174,10 +1178,11 @@ ; CHECK-LE-LABEL: getveluc: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 8 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 7 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 3 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -1187,10 +1192,11 @@ ; ; CHECK-AIX-LABEL: getveluc: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 8 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 7 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: andi. 5, 3, 8 ; CHECK-AIX-NEXT: andc 3, 4, 3 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 @@ -1672,12 +1678,13 @@ define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) { ; CHECK-LABEL: getvelss: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 4 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 3 -; CHECK-NEXT: sldi r4, r4, 1 -; CHECK-NEXT: andc r3, r3, r5 -; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: andi. r5, r4, 4 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: sldi r5, r5, 1 ; CHECK-NEXT: sldi r3, r3, 4 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 ; CHECK-NEXT: srd r3, r4, r3 @@ -1687,11 +1694,12 @@ ; CHECK-LE-LABEL: getvelss: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 4 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 1 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 3 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 4 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -1701,12 +1709,13 @@ ; ; CHECK-AIX-LABEL: getvelss: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 4 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 3 -; CHECK-AIX-NEXT: sldi 5, 5, 1 +; CHECK-AIX-NEXT: andi. 5, 3, 4 ; CHECK-AIX-NEXT: andc 3, 4, 3 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: sldi 5, 5, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 4 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 ; CHECK-AIX-NEXT: srd 3, 4, 3 @@ -1723,12 +1732,13 @@ define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) { ; CHECK-LABEL: getvelus: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 4 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 3 -; CHECK-NEXT: sldi r4, r4, 1 -; CHECK-NEXT: andc r3, r3, r5 -; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: andi. r5, r4, 4 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: sldi r5, r5, 1 ; CHECK-NEXT: sldi r3, r3, 4 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 ; CHECK-NEXT: srd r3, r4, r3 @@ -1738,11 +1748,12 @@ ; CHECK-LE-LABEL: getvelus: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 4 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 1 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 3 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 4 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -1752,12 +1763,13 @@ ; ; CHECK-AIX-LABEL: getvelus: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 4 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 3 -; CHECK-AIX-NEXT: sldi 5, 5, 1 +; CHECK-AIX-NEXT: andi. 5, 3, 4 ; CHECK-AIX-NEXT: andc 3, 4, 3 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: sldi 5, 5, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 4 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 ; CHECK-AIX-NEXT: srd 3, 4, 3 @@ -1988,12 +2000,13 @@ define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) { ; CHECK-LABEL: getvelsi: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 2 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 1 -; CHECK-NEXT: sldi r4, r4, 2 -; CHECK-NEXT: andc r3, r3, r5 -; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: andi. r5, r4, 2 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: sldi r5, r5, 2 ; CHECK-NEXT: sldi r3, r3, 5 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 ; CHECK-NEXT: srd r3, r4, r3 @@ -2003,11 +2016,12 @@ ; CHECK-LE-LABEL: getvelsi: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 2 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 2 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 5 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -2017,12 +2031,13 @@ ; ; CHECK-AIX-LABEL: getvelsi: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 2 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 1 -; CHECK-AIX-NEXT: sldi 5, 5, 2 +; CHECK-AIX-NEXT: andi. 5, 3, 2 ; CHECK-AIX-NEXT: andc 3, 4, 3 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: sldi 5, 5, 2 ; CHECK-AIX-NEXT: sldi 3, 3, 5 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 ; CHECK-AIX-NEXT: srd 3, 4, 3 @@ -2038,12 +2053,13 @@ define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) { ; CHECK-LABEL: getvelui: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 2 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 1 -; CHECK-NEXT: sldi r4, r4, 2 -; CHECK-NEXT: andc r3, r3, r5 -; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: andi. r5, r4, 2 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: sldi r5, r5, 2 ; CHECK-NEXT: sldi r3, r3, 5 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 ; CHECK-NEXT: srd r3, r4, r3 @@ -2053,11 +2069,12 @@ ; CHECK-LE-LABEL: getvelui: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 2 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 2 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 5 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -2067,12 +2084,13 @@ ; ; CHECK-AIX-LABEL: getvelui: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 2 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 1 -; CHECK-AIX-NEXT: sldi 5, 5, 2 +; CHECK-AIX-NEXT: andi. 5, 3, 2 ; CHECK-AIX-NEXT: andc 3, 4, 3 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: sldi 5, 5, 2 ; CHECK-AIX-NEXT: sldi 3, 3, 5 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 ; CHECK-AIX-NEXT: srd 3, 4, 3 @@ -2186,7 +2204,8 @@ define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) { ; CHECK-LABEL: getvelsl: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r3, r5, 1 +; CHECK-NEXT: clrldi r3, r5, 32 +; CHECK-NEXT: andi. r3, r3, 1 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 @@ -2196,7 +2215,8 @@ ; CHECK-LE-LABEL: getvelsl: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 3 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 @@ -2205,6 +2225,7 @@ ; ; CHECK-AIX-LABEL: getvelsl: ; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: andi. 3, 3, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 @@ -2221,7 +2242,8 @@ define i64 @getvelul(<2 x i64> %vul, i32 signext %i) { ; CHECK-LABEL: getvelul: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r3, r5, 1 +; CHECK-NEXT: clrldi r3, r5, 32 +; CHECK-NEXT: andi. r3, r3, 1 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 @@ -2231,7 +2253,8 @@ ; CHECK-LE-LABEL: getvelul: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 3 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 @@ -2240,6 +2263,7 @@ ; ; CHECK-AIX-LABEL: getvelul: ; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: andi. 3, 3, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 @@ -2357,7 +2381,7 @@ define float @getvelf(<4 x float> %vf, i32 signext %i) { ; CHECK-LABEL: getvelf: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 2 +; CHECK-NEXT: rldic r3, r5, 2, 30 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: xscvspdpn f1, v2 @@ -2365,7 +2389,8 @@ ; ; CHECK-LE-LABEL: getvelf: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xori r3, r5, 3 +; CHECK-LE-NEXT: clrldi r3, r5, 32 +; CHECK-LE-NEXT: xori r3, r3, 3 ; CHECK-LE-NEXT: sldi r3, r3, 2 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 @@ -2374,7 +2399,7 @@ ; ; CHECK-AIX-LABEL: getvelf: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: sldi 3, 3, 2 +; CHECK-AIX-NEXT: rldic 3, 3, 2, 30 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: xscvspdpn 1, 34 @@ -2436,7 +2461,8 @@ define double @getveld(<2 x double> %vd, i32 signext %i) { ; CHECK-LABEL: getveld: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r3, r5, 1 +; CHECK-NEXT: clrldi r3, r5, 32 +; CHECK-NEXT: andi. r3, r3, 1 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 @@ -2447,7 +2473,8 @@ ; CHECK-LE-LABEL: getveld: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 3 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 @@ -2457,6 +2484,7 @@ ; ; CHECK-AIX-LABEL: getveld: ; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: andi. 3, 3, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 diff --git a/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll b/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll --- a/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll +++ b/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -ppc-late-peephole=true < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ ; RUN: --check-prefix=CHECK-BE @@ -6,109 +7,152 @@ ; Function Attrs: norecurse nounwind readnone define signext i32 @geti(<4 x i32> %a, i32 signext %b) { +; CHECK-LABEL: geti: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 3, 2 +; CHECK-NEXT: clrldi 4, 5, 32 +; CHECK-NEXT: andc 3, 3, 4 +; CHECK-NEXT: sldi 3, 3, 2 +; CHECK-NEXT: lvsl 3, 0, 3 +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: sldi 3, 3, 5 +; CHECK-NEXT: mfvsrd 4, 34 +; CHECK-NEXT: srd 3, 4, 3 +; CHECK-NEXT: extsw 3, 3 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: geti: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: clrldi 4, 5, 32 +; CHECK-BE-NEXT: li 3, 1 +; CHECK-BE-NEXT: andi. 5, 4, 2 +; CHECK-BE-NEXT: andc 3, 3, 4 +; CHECK-BE-NEXT: sldi 5, 5, 2 +; CHECK-BE-NEXT: sldi 3, 3, 5 +; CHECK-BE-NEXT: lvsl 3, 0, 5 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: mfvsrd 4, 34 +; CHECK-BE-NEXT: srd 3, 4, 3 +; CHECK-BE-NEXT: extsw 3, 3 +; CHECK-BE-NEXT: blr +; +; CHECK-P7-LABEL: geti: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: addi 3, 1, -16 +; CHECK-P7-NEXT: rlwinm 4, 5, 2, 28, 29 +; CHECK-P7-NEXT: stxvw4x 34, 0, 3 +; CHECK-P7-NEXT: lwax 3, 3, 4 +; CHECK-P7-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 %b ret i32 %vecext -; CHECK-LABEL: @geti -; CHECK-P7-LABEL: @geti -; CHECK-BE-LABEL: @geti -; CHECK-DAG: li [[TRUNCREG:[0-9]+]], 2 -; CHECK-DAG: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5 -; CHECK-DAG: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 2 -; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]] -; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]] -; CHECK-DAG: li [[ONEREG:[0-9]+]], 1 -; CHECK-DAG: and [[ELEMSREG:[0-9]+]], [[ONEREG]], 5 -; CHECK-DAG: sldi [[SHAMREG:[0-9]+]], [[ELEMSREG]], 5 -; CHECK: mfvsrd [[TOGPR:[0-9]+]], -; CHECK: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]] -; CHECK: extsw 3, [[RSHREG]] -; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 2, 28, 29 -; CHECK-P7-DAG: stxvw4x 34, -; CHECK-P7: lwax 3, 3, [[ELEMOFFREG]] -; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 2 -; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 2 -; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] -; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] -; CHECK-BE-DAG: li [[IMMREG:[0-9]+]], 1 -; CHECK-BE-DAG: andc [[ANDCREG:[0-9]+]], [[IMMREG]], 5 -; CHECK-BE-DAG: sldi [[SHAMREG:[0-9]+]], [[ANDCREG]], 5 -; CHECK-BE: mfvsrd [[TOGPR:[0-9]+]], -; CHECK-BE: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]] -; CHECK-BE: extsw 3, [[RSHREG]] } ; Function Attrs: norecurse nounwind readnone define i64 @getl(<2 x i64> %a, i32 signext %b) { +; CHECK-LABEL: getl: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: clrldi 4, 5, 32 +; CHECK-NEXT: andc 3, 3, 4 +; CHECK-NEXT: sldi 3, 3, 3 +; CHECK-NEXT: lvsl 3, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: mfvsrd 3, 34 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: getl: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: andi. 3, 3, 1 +; CHECK-BE-NEXT: sldi 3, 3, 3 +; CHECK-BE-NEXT: lvsl 3, 0, 3 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: mfvsrd 3, 34 +; CHECK-BE-NEXT: blr +; +; CHECK-P7-LABEL: getl: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: addi 3, 1, -16 +; CHECK-P7-NEXT: rlwinm 4, 5, 3, 28, 28 +; CHECK-P7-NEXT: stxvd2x 34, 0, 3 +; CHECK-P7-NEXT: ldx 3, 3, 4 +; CHECK-P7-NEXT: blr entry: %vecext = extractelement <2 x i64> %a, i32 %b ret i64 %vecext -; CHECK-LABEL: @getl -; CHECK-P7-LABEL: @getl -; CHECK-BE-LABEL: @getl -; CHECK-DAG: li [[TRUNCREG:[0-9]+]], 1 -; CHECK-DAG: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5 -; CHECK-DAG: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 3 -; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]] -; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]] -; CHECK: mfvsrd 3, -; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 3, 28, 28 -; CHECK-P7-DAG: stxvd2x 34, -; CHECK-P7: ldx 3, 3, [[ELEMOFFREG]] -; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1 -; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3 -; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] -; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] -; CHECK-BE: mfvsrd 3, } ; Function Attrs: norecurse nounwind readnone define float @getf(<4 x float> %a, i32 signext %b) { +; CHECK-LABEL: getf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clrldi 3, 5, 32 +; CHECK-NEXT: xori 3, 3, 3 +; CHECK-NEXT: sldi 3, 3, 2 +; CHECK-NEXT: lvsl 3, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: xscvspdpn 1, 34 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: getf: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: rldic 3, 5, 2, 30 +; CHECK-BE-NEXT: lvsl 3, 0, 3 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: xscvspdpn 1, 34 +; CHECK-BE-NEXT: blr +; +; CHECK-P7-LABEL: getf: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: addi 3, 1, -16 +; CHECK-P7-NEXT: rlwinm 4, 5, 2, 28, 29 +; CHECK-P7-NEXT: stxvw4x 34, 0, 3 +; CHECK-P7-NEXT: lfsx 1, 3, 4 +; CHECK-P7-NEXT: blr entry: %vecext = extractelement <4 x float> %a, i32 %b ret float %vecext -; CHECK-LABEL: @getf -; CHECK-P7-LABEL: @getf -; CHECK-BE-LABEL: @getf -; CHECK: xori [[TRUNCREG:[0-9]+]], 5, 3 -; CHECK: sldi [[SHIFTREG:[0-9]+]], [[TRUNCREG]], 2 -; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]] -; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] -; CHECK: xscvspdpn 1, -; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 2, 28, 29 -; CHECK-P7-DAG: stxvw4x 34, -; CHECK-P7: lfsx 1, 3, [[ELEMOFFREG]] -; CHECK-BE: sldi [[ELNOREG:[0-9]+]], 5, 2 -; CHECK-BE: lvsl [[SHMSKREG:[0-9]+]], 0, [[ELNOREG]] -; CHECK-BE: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] -; CHECK-BE: xscvspdpn 1, } ; Function Attrs: norecurse nounwind readnone define double @getd(<2 x double> %a, i32 signext %b) { +; CHECK-LABEL: getd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: clrldi 4, 5, 32 +; CHECK-NEXT: andc 3, 3, 4 +; CHECK-NEXT: sldi 3, 3, 3 +; CHECK-NEXT: lvsl 3, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: xxlor 1, 34, 34 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: getd: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: andi. 3, 3, 1 +; CHECK-BE-NEXT: sldi 3, 3, 3 +; CHECK-BE-NEXT: lvsl 3, 0, 3 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: xxlor 1, 34, 34 +; CHECK-BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-BE-NEXT: blr +; +; CHECK-P7-LABEL: getd: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT: clrldi 3, 5, 32 +; CHECK-P7-NEXT: andi. 3, 3, 1 +; CHECK-P7-NEXT: sldi 3, 3, 3 +; CHECK-P7-NEXT: lvsl 3, 0, 3 +; CHECK-P7-NEXT: vperm 2, 2, 2, 3 +; CHECK-P7-NEXT: xxlor 1, 34, 34 +; CHECK-P7-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-P7-NEXT: blr entry: %vecext = extractelement <2 x double> %a, i32 %b ret double %vecext -; CHECK-LABEL: @getd -; CHECK-P7-LABEL: @getd -; CHECK-BE-LABEL: @getd -; CHECK: li [[TRUNCREG:[0-9]+]], 1 -; CHECK: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5 -; CHECK: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 3 -; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]] -; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] -; FIXME: the instruction below is a redundant regclass copy, to be removed -; CHECK: xxlor 1, -; CHECK-P7-DAG: andi. [[ANDREG:[0-9]+]], 5, 1 -; CHECK-P7-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3 -; CHECK-P7-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] -; CHECK-P7-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] -; FIXME: the instruction below is a redundant regclass copy, to be removed -; CHECK-P7: xxlor 1, -; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1 -; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3 -; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] -; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] -; FIXME: the instruction below is a redundant regclass copy, to be removed -; CHECK-BE: xxlor 1, } diff --git a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll --- a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll +++ b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll @@ -5,13 +5,15 @@ define zeroext i8 @test1(<16 x i8> %a, i32 signext %index) { ; CHECK-LE-LABEL: test1: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: vextubrx 3, 3, 2 ; CHECK-LE-NEXT: clrldi 3, 3, 56 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: vextublx 3, 3, 2 ; CHECK-BE-NEXT: clrldi 3, 3, 56 ; CHECK-BE-NEXT: blr @@ -23,13 +25,15 @@ define signext i8 @test2(<16 x i8> %a, i32 signext %index) { ; CHECK-LE-LABEL: test2: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: vextubrx 3, 3, 2 ; CHECK-LE-NEXT: extsb 3, 3 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: vextublx 3, 3, 2 ; CHECK-BE-NEXT: extsb 3, 3 ; CHECK-BE-NEXT: blr @@ -41,14 +45,16 @@ define zeroext i16 @test3(<8 x i16> %a, i32 signext %index) { ; CHECK-LE-LABEL: test3: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-LE-NEXT: vextuhrx 3, 3, 2 ; CHECK-LE-NEXT: clrldi 3, 3, 48 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-BE-NEXT: vextuhlx 3, 3, 2 ; CHECK-BE-NEXT: clrldi 3, 3, 48 ; CHECK-BE-NEXT: blr @@ -61,14 +67,16 @@ define signext i16 @test4(<8 x i16> %a, i32 signext %index) { ; CHECK-LE-LABEL: test4: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-LE-NEXT: vextuhrx 3, 3, 2 ; CHECK-LE-NEXT: extsh 3, 3 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test4: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-BE-NEXT: vextuhlx 3, 3, 2 ; CHECK-BE-NEXT: extsh 3, 3 ; CHECK-BE-NEXT: blr @@ -81,13 +89,15 @@ define zeroext i32 @test5(<4 x i32> %a, i32 signext %index) { ; CHECK-LE-LABEL: test5: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-LE-NEXT: vextuwrx 3, 3, 2 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: blr @@ -99,14 +109,16 @@ define signext i32 @test6(<4 x i32> %a, i32 signext %index) { ; CHECK-LE-LABEL: test6: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-LE-NEXT: vextuwrx 3, 3, 2 ; CHECK-LE-NEXT: extsw 3, 3 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test6: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: extsw 3, 3 ; CHECK-BE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll b/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll --- a/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll +++ b/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll @@ -5,14 +5,16 @@ define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) { ; CHECK-LE-LABEL: test_add1: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: vextubrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: clrldi 3, 3, 56 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test_add1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: vextublx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: clrldi 3, 3, 56 ; CHECK-BE-NEXT: blr @@ -28,14 +30,16 @@ define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) { ; CHECK-LE-LABEL: test_add2: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: vextubrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: extsb 3, 3 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test_add2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: vextublx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: extsb 3, 3 ; CHECK-BE-NEXT: blr @@ -51,7 +55,8 @@ define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) { ; CHECK-LE-LABEL: test_add3: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-LE-NEXT: vextuhrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: clrldi 3, 3, 48 @@ -59,7 +64,8 @@ ; ; CHECK-BE-LABEL: test_add3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-BE-NEXT: vextuhlx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: clrldi 3, 3, 48 @@ -76,7 +82,8 @@ define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) { ; CHECK-LE-LABEL: test_add4: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-LE-NEXT: vextuhrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: extsh 3, 3 @@ -84,7 +91,8 @@ ; ; CHECK-BE-LABEL: test_add4: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-BE-NEXT: vextuhlx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: extsh 3, 3 @@ -101,7 +109,8 @@ define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) { ; CHECK-LE-LABEL: test_add5: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-LE-NEXT: vextuwrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: clrldi 3, 3, 32 @@ -109,7 +118,8 @@ ; ; CHECK-BE-LABEL: test_add5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: clrldi 3, 3, 32 @@ -123,7 +133,8 @@ define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) { ; CHECK-LE-LABEL: test_add6: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-LE-NEXT: vextuwrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: extsw 3, 3 @@ -131,7 +142,8 @@ ; ; CHECK-BE-LABEL: test_add6: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: extsw 3, 3 diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll @@ -353,16 +353,14 @@ ; CHECK-LABEL: testFloat1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xscvdpspn v3, f1 -; CHECK-NEXT: extsw r3, r6 -; CHECK-NEXT: slwi r3, r3, 2 +; CHECK-NEXT: slwi r3, r6, 2 ; CHECK-NEXT: vinswvrx v2, r3, v3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testFloat1: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xscvdpspn v3, f1 -; CHECK-BE-NEXT: extsw r3, r6 -; CHECK-BE-NEXT: slwi r3, r3, 2 +; CHECK-BE-NEXT: slwi r3, r6, 2 ; CHECK-BE-NEXT: vinswvlx v2, r3, v3 ; CHECK-BE-NEXT: blr ; @@ -392,74 +390,54 @@ ; CHECK-LABEL: testFloat2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lwz r3, 0(r5) -; CHECK-NEXT: extsw r4, r6 -; CHECK-NEXT: slwi r4, r4, 2 +; CHECK-NEXT: slwi r4, r6, 2 ; CHECK-NEXT: vinswrx v2, r4, r3 ; CHECK-NEXT: lwz r3, 1(r5) -; CHECK-NEXT: extsw r4, r7 -; CHECK-NEXT: slwi r4, r4, 2 +; CHECK-NEXT: slwi r4, r7, 2 ; CHECK-NEXT: vinswrx v2, r4, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testFloat2: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lwz r3, 0(r5) -; CHECK-BE-NEXT: extsw r4, r6 -; CHECK-BE-NEXT: slwi r4, r4, 2 +; CHECK-BE-NEXT: slwi r4, r6, 2 ; CHECK-BE-NEXT: vinswlx v2, r4, r3 ; CHECK-BE-NEXT: lwz r3, 1(r5) -; CHECK-BE-NEXT: extsw r4, r7 -; CHECK-BE-NEXT: slwi r4, r4, 2 +; CHECK-BE-NEXT: slwi r4, r7, 2 ; CHECK-BE-NEXT: vinswlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testFloat2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lwz r3, 0(r5) ; CHECK-P9-NEXT: rlwinm r4, r6, 2, 28, 29 -; CHECK-P9-NEXT: addi r6, r1, -16 +; CHECK-P9-NEXT: lwz r6, 0(r5) +; CHECK-P9-NEXT: rlwinm r3, r7, 2, 28, 29 +; CHECK-P9-NEXT: addi r7, r1, -16 ; CHECK-P9-NEXT: stxv v2, -16(r1) -; CHECK-P9-NEXT: stwx r3, r6, r4 -; CHECK-P9-NEXT: rlwinm r4, r7, 2, 28, 29 +; CHECK-P9-NEXT: stwx r6, r7, r4 ; CHECK-P9-NEXT: lxv vs0, -16(r1) -; CHECK-P9-NEXT: lwz r3, 1(r5) +; CHECK-P9-NEXT: lwz r4, 1(r5) ; CHECK-P9-NEXT: addi r5, r1, -32 ; CHECK-P9-NEXT: stxv vs0, -32(r1) -; CHECK-P9-NEXT: stwx r3, r5, r4 +; CHECK-P9-NEXT: stwx r4, r5, r3 ; CHECK-P9-NEXT: lxv v2, -32(r1) ; CHECK-P9-NEXT: blr ; -; AIX-P8-64-LABEL: testFloat2: -; AIX-P8-64: # %bb.0: # %entry -; AIX-P8-64-NEXT: lwz r7, 0(r3) -; AIX-P8-64-NEXT: addi r6, r1, -32 -; AIX-P8-64-NEXT: rlwinm r4, r4, 2, 28, 29 -; AIX-P8-64-NEXT: rlwinm r5, r5, 2, 28, 29 -; AIX-P8-64-NEXT: stxvw4x v2, 0, r6 -; AIX-P8-64-NEXT: stwx r7, r6, r4 -; AIX-P8-64-NEXT: addi r4, r1, -16 -; AIX-P8-64-NEXT: lxvw4x vs0, 0, r6 -; AIX-P8-64-NEXT: lwz r3, 1(r3) -; AIX-P8-64-NEXT: stxvw4x vs0, 0, r4 -; AIX-P8-64-NEXT: stwx r3, r4, r5 -; AIX-P8-64-NEXT: lxvw4x v2, 0, r4 -; AIX-P8-64-NEXT: blr -; -; AIX-P8-32-LABEL: testFloat2: -; AIX-P8-32: # %bb.0: # %entry -; AIX-P8-32-NEXT: lwz r7, 0(r3) -; AIX-P8-32-NEXT: addi r6, r1, -32 -; AIX-P8-32-NEXT: rlwinm r4, r4, 2, 28, 29 -; AIX-P8-32-NEXT: stxvw4x v2, 0, r6 -; AIX-P8-32-NEXT: stwx r7, r6, r4 -; AIX-P8-32-NEXT: rlwinm r4, r5, 2, 28, 29 -; AIX-P8-32-NEXT: addi r5, r1, -16 -; AIX-P8-32-NEXT: lxvw4x vs0, 0, r6 -; AIX-P8-32-NEXT: lwz r3, 1(r3) -; AIX-P8-32-NEXT: stxvw4x vs0, 0, r5 -; AIX-P8-32-NEXT: stwx r3, r5, r4 -; AIX-P8-32-NEXT: lxvw4x v2, 0, r5 -; AIX-P8-32-NEXT: blr +; AIX-P8-LABEL: testFloat2: +; AIX-P8: # %bb.0: # %entry +; AIX-P8-NEXT: lwz r7, 0(r3) +; AIX-P8-NEXT: addi r6, r1, -32 +; AIX-P8-NEXT: rlwinm r4, r4, 2, 28, 29 +; AIX-P8-NEXT: stxvw4x v2, 0, r6 +; AIX-P8-NEXT: stwx r7, r6, r4 +; AIX-P8-NEXT: rlwinm r4, r5, 2, 28, 29 +; AIX-P8-NEXT: addi r5, r1, -16 +; AIX-P8-NEXT: lxvw4x vs0, 0, r6 +; AIX-P8-NEXT: lwz r3, 1(r3) +; AIX-P8-NEXT: stxvw4x vs0, 0, r5 +; AIX-P8-NEXT: stwx r3, r5, r4 +; AIX-P8-NEXT: lxvw4x v2, 0, r5 +; AIX-P8-NEXT: blr entry: %0 = bitcast i8* %b to float* %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1 @@ -475,13 +453,11 @@ ; CHECK-LABEL: testFloat3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: plwz r3, 65536(r5), 0 -; CHECK-NEXT: extsw r4, r6 -; CHECK-NEXT: slwi r4, r4, 2 +; CHECK-NEXT: slwi r4, r6, 2 ; CHECK-NEXT: vinswrx v2, r4, r3 ; CHECK-NEXT: li r3, 1 -; CHECK-NEXT: extsw r4, r7 +; CHECK-NEXT: slwi r4, r7, 2 ; CHECK-NEXT: rldic r3, r3, 36, 27 -; CHECK-NEXT: slwi r4, r4, 2 ; CHECK-NEXT: lwzx r3, r5, r3 ; CHECK-NEXT: vinswrx v2, r4, r3 ; CHECK-NEXT: blr @@ -489,42 +465,39 @@ ; CHECK-BE-LABEL: testFloat3: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: plwz r3, 65536(r5), 0 -; CHECK-BE-NEXT: extsw r4, r6 -; CHECK-BE-NEXT: slwi r4, r4, 2 +; CHECK-BE-NEXT: slwi r4, r6, 2 ; CHECK-BE-NEXT: vinswlx v2, r4, r3 ; CHECK-BE-NEXT: li r3, 1 -; CHECK-BE-NEXT: extsw r4, r7 +; CHECK-BE-NEXT: slwi r4, r7, 2 ; CHECK-BE-NEXT: rldic r3, r3, 36, 27 -; CHECK-BE-NEXT: slwi r4, r4, 2 ; CHECK-BE-NEXT: lwzx r3, r5, r3 ; CHECK-BE-NEXT: vinswlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testFloat3: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lis r3, 1 ; CHECK-P9-NEXT: rlwinm r4, r6, 2, 28, 29 -; CHECK-P9-NEXT: addi r6, r1, -16 -; CHECK-P9-NEXT: lwzx r3, r5, r3 +; CHECK-P9-NEXT: lis r6, 1 +; CHECK-P9-NEXT: rlwinm r3, r7, 2, 28, 29 +; CHECK-P9-NEXT: addi r7, r1, -16 +; CHECK-P9-NEXT: lwzx r6, r5, r6 ; CHECK-P9-NEXT: stxv v2, -16(r1) -; CHECK-P9-NEXT: stwx r3, r6, r4 -; CHECK-P9-NEXT: li r3, 1 -; CHECK-P9-NEXT: rlwinm r4, r7, 2, 28, 29 +; CHECK-P9-NEXT: stwx r6, r7, r4 +; CHECK-P9-NEXT: li r4, 1 ; CHECK-P9-NEXT: lxv vs0, -16(r1) -; CHECK-P9-NEXT: rldic r3, r3, 36, 27 -; CHECK-P9-NEXT: lwzx r3, r5, r3 +; CHECK-P9-NEXT: rldic r4, r4, 36, 27 +; CHECK-P9-NEXT: lwzx r4, r5, r4 ; CHECK-P9-NEXT: addi r5, r1, -32 ; CHECK-P9-NEXT: stxv vs0, -32(r1) -; CHECK-P9-NEXT: stwx r3, r5, r4 +; CHECK-P9-NEXT: stwx r4, r5, r3 ; CHECK-P9-NEXT: lxv v2, -32(r1) ; CHECK-P9-NEXT: blr ; ; AIX-P8-64-LABEL: testFloat3: ; AIX-P8-64: # %bb.0: # %entry ; AIX-P8-64-NEXT: lis r6, 1 -; AIX-P8-64-NEXT: addi r7, r1, -32 ; AIX-P8-64-NEXT: rlwinm r4, r4, 2, 28, 29 -; AIX-P8-64-NEXT: rlwinm r5, r5, 2, 28, 29 +; AIX-P8-64-NEXT: addi r7, r1, -32 ; AIX-P8-64-NEXT: lwzx r6, r3, r6 ; AIX-P8-64-NEXT: stxvw4x v2, 0, r7 ; AIX-P8-64-NEXT: stwx r6, r7, r4 @@ -532,10 +505,11 @@ ; AIX-P8-64-NEXT: lxvw4x vs0, 0, r7 ; AIX-P8-64-NEXT: rldic r4, r4, 36, 27 ; AIX-P8-64-NEXT: lwzx r3, r3, r4 -; AIX-P8-64-NEXT: addi r4, r1, -16 -; AIX-P8-64-NEXT: stxvw4x vs0, 0, r4 -; AIX-P8-64-NEXT: stwx r3, r4, r5 -; AIX-P8-64-NEXT: lxvw4x v2, 0, r4 +; AIX-P8-64-NEXT: rlwinm r4, r5, 2, 28, 29 +; AIX-P8-64-NEXT: addi r5, r1, -16 +; AIX-P8-64-NEXT: stxvw4x vs0, 0, r5 +; AIX-P8-64-NEXT: stwx r3, r5, r4 +; AIX-P8-64-NEXT: lxvw4x v2, 0, r5 ; AIX-P8-64-NEXT: blr ; ; AIX-P8-32-LABEL: testFloat3: @@ -758,17 +732,15 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) { ; CHECK-LABEL: testDouble1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: extsw r4, r6 ; CHECK-NEXT: mffprd r3, f1 -; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-NEXT: vinsdrx v2, r4, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testDouble1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: extsw r4, r6 ; CHECK-BE-NEXT: mffprd r3, f1 -; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-BE-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-BE-NEXT: vinsdlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; @@ -807,41 +779,37 @@ ; CHECK-LABEL: testDouble2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld r3, 0(r5) -; CHECK-NEXT: extsw r4, r6 -; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-NEXT: vinsdrx v2, r4, r3 ; CHECK-NEXT: pld r3, 1(r5), 0 -; CHECK-NEXT: extsw r4, r7 -; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-NEXT: rlwinm r4, r7, 3, 0, 28 ; CHECK-NEXT: vinsdrx v2, r4, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testDouble2: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: ld r3, 0(r5) -; CHECK-BE-NEXT: extsw r4, r6 -; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-BE-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-BE-NEXT: vinsdlx v2, r4, r3 ; CHECK-BE-NEXT: pld r3, 1(r5), 0 -; CHECK-BE-NEXT: extsw r4, r7 -; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-BE-NEXT: rlwinm r4, r7, 3, 0, 28 ; CHECK-BE-NEXT: vinsdlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testDouble2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: ld r3, 0(r5) ; CHECK-P9-NEXT: rlwinm r4, r6, 3, 28, 28 -; CHECK-P9-NEXT: addi r6, r1, -32 +; CHECK-P9-NEXT: ld r6, 0(r5) +; CHECK-P9-NEXT: rlwinm r3, r7, 3, 28, 28 +; CHECK-P9-NEXT: addi r7, r1, -32 ; CHECK-P9-NEXT: stxv v2, -32(r1) -; CHECK-P9-NEXT: stdx r3, r6, r4 -; CHECK-P9-NEXT: li r3, 1 -; CHECK-P9-NEXT: rlwinm r4, r7, 3, 28, 28 +; CHECK-P9-NEXT: stdx r6, r7, r4 +; CHECK-P9-NEXT: li r4, 1 ; CHECK-P9-NEXT: lxv vs0, -32(r1) -; CHECK-P9-NEXT: ldx r3, r5, r3 +; CHECK-P9-NEXT: ldx r4, r5, r4 ; CHECK-P9-NEXT: addi r5, r1, -16 ; CHECK-P9-NEXT: stxv vs0, -16(r1) -; CHECK-P9-NEXT: stdx r3, r5, r4 +; CHECK-P9-NEXT: stdx r4, r5, r3 ; CHECK-P9-NEXT: lxv v2, -16(r1) ; CHECK-P9-NEXT: blr ; @@ -850,16 +818,16 @@ ; AIX-P8-64-NEXT: ld r7, 0(r3) ; AIX-P8-64-NEXT: addi r6, r1, -32 ; AIX-P8-64-NEXT: rlwinm r4, r4, 3, 28, 28 -; AIX-P8-64-NEXT: rlwinm r5, r5, 3, 28, 28 ; AIX-P8-64-NEXT: stxvd2x v2, 0, r6 ; AIX-P8-64-NEXT: stdx r7, r6, r4 ; AIX-P8-64-NEXT: li r4, 1 ; AIX-P8-64-NEXT: lxvd2x vs0, 0, r6 ; AIX-P8-64-NEXT: ldx r3, r3, r4 -; AIX-P8-64-NEXT: addi r4, r1, -16 -; AIX-P8-64-NEXT: stxvd2x vs0, 0, r4 -; AIX-P8-64-NEXT: stdx r3, r4, r5 -; AIX-P8-64-NEXT: lxvd2x v2, 0, r4 +; AIX-P8-64-NEXT: rlwinm r4, r5, 3, 28, 28 +; AIX-P8-64-NEXT: addi r5, r1, -16 +; AIX-P8-64-NEXT: stxvd2x vs0, 0, r5 +; AIX-P8-64-NEXT: stdx r3, r5, r4 +; AIX-P8-64-NEXT: lxvd2x v2, 0, r5 ; AIX-P8-64-NEXT: blr ; ; AIX-P8-32-LABEL: testDouble2: @@ -892,13 +860,11 @@ ; CHECK-LABEL: testDouble3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, 65536(r5), 0 -; CHECK-NEXT: extsw r4, r6 -; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-NEXT: vinsdrx v2, r4, r3 ; CHECK-NEXT: li r3, 1 -; CHECK-NEXT: extsw r4, r7 +; CHECK-NEXT: rlwinm r4, r7, 3, 0, 28 ; CHECK-NEXT: rldic r3, r3, 36, 27 -; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28 ; CHECK-NEXT: ldx r3, r5, r3 ; CHECK-NEXT: vinsdrx v2, r4, r3 ; CHECK-NEXT: blr @@ -906,53 +872,51 @@ ; CHECK-BE-LABEL: testDouble3: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: pld r3, 65536(r5), 0 -; CHECK-BE-NEXT: extsw r4, r6 -; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-BE-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-BE-NEXT: vinsdlx v2, r4, r3 ; CHECK-BE-NEXT: li r3, 1 -; CHECK-BE-NEXT: extsw r4, r7 +; CHECK-BE-NEXT: rlwinm r4, r7, 3, 0, 28 ; CHECK-BE-NEXT: rldic r3, r3, 36, 27 -; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28 ; CHECK-BE-NEXT: ldx r3, r5, r3 ; CHECK-BE-NEXT: vinsdlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testDouble3: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lis r3, 1 ; CHECK-P9-NEXT: rlwinm r4, r6, 3, 28, 28 -; CHECK-P9-NEXT: addi r6, r1, -32 -; CHECK-P9-NEXT: ldx r3, r5, r3 +; CHECK-P9-NEXT: lis r6, 1 +; CHECK-P9-NEXT: rlwinm r3, r7, 3, 28, 28 +; CHECK-P9-NEXT: addi r7, r1, -32 +; CHECK-P9-NEXT: ldx r6, r5, r6 ; CHECK-P9-NEXT: stxv v2, -32(r1) -; CHECK-P9-NEXT: stdx r3, r6, r4 -; CHECK-P9-NEXT: li r3, 1 -; CHECK-P9-NEXT: rlwinm r4, r7, 3, 28, 28 +; CHECK-P9-NEXT: stdx r6, r7, r4 +; CHECK-P9-NEXT: li r4, 1 ; CHECK-P9-NEXT: lxv vs0, -32(r1) -; CHECK-P9-NEXT: rldic r3, r3, 36, 27 -; CHECK-P9-NEXT: ldx r3, r5, r3 +; CHECK-P9-NEXT: rldic r4, r4, 36, 27 +; CHECK-P9-NEXT: ldx r4, r5, r4 ; CHECK-P9-NEXT: addi r5, r1, -16 ; CHECK-P9-NEXT: stxv vs0, -16(r1) -; CHECK-P9-NEXT: stdx r3, r5, r4 +; CHECK-P9-NEXT: stdx r4, r5, r3 ; CHECK-P9-NEXT: lxv v2, -16(r1) ; CHECK-P9-NEXT: blr ; ; AIX-P8-64-LABEL: testDouble3: ; AIX-P8-64: # %bb.0: # %entry ; AIX-P8-64-NEXT: lis r6, 1 -; AIX-P8-64-NEXT: addi r7, r1, -32 ; AIX-P8-64-NEXT: rlwinm r4, r4, 3, 28, 28 +; AIX-P8-64-NEXT: addi r7, r1, -32 ; AIX-P8-64-NEXT: li r8, 1 -; AIX-P8-64-NEXT: rlwinm r5, r5, 3, 28, 28 ; AIX-P8-64-NEXT: ldx r6, r3, r6 ; AIX-P8-64-NEXT: stxvd2x v2, 0, r7 ; AIX-P8-64-NEXT: stdx r6, r7, r4 ; AIX-P8-64-NEXT: rldic r4, r8, 36, 27 ; AIX-P8-64-NEXT: lxvd2x vs0, 0, r7 ; AIX-P8-64-NEXT: ldx r3, r3, r4 -; AIX-P8-64-NEXT: addi r4, r1, -16 -; AIX-P8-64-NEXT: stxvd2x vs0, 0, r4 -; AIX-P8-64-NEXT: stdx r3, r4, r5 -; AIX-P8-64-NEXT: lxvd2x v2, 0, r4 +; AIX-P8-64-NEXT: rlwinm r4, r5, 3, 28, 28 +; AIX-P8-64-NEXT: addi r5, r1, -16 +; AIX-P8-64-NEXT: stxvd2x vs0, 0, r5 +; AIX-P8-64-NEXT: stdx r3, r5, r4 +; AIX-P8-64-NEXT: lxvd2x v2, 0, r5 ; AIX-P8-64-NEXT: blr ; ; AIX-P8-32-LABEL: testDouble3: diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll @@ -26,6 +26,8 @@ define half @extractelt_nxv1f16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv1f16_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -58,6 +60,8 @@ define half @extractelt_nxv2f16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv2f16_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -90,6 +94,8 @@ define half @extractelt_nxv4f16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv4f16_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -122,6 +128,8 @@ define half @extractelt_nxv8f16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv8f16_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -154,6 +162,8 @@ define half @extractelt_nxv16f16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv16f16_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -186,6 +196,8 @@ define half @extractelt_nxv32f16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv32f16_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -218,6 +230,8 @@ define float @extractelt_nxv1f32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv1f32_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -250,6 +264,8 @@ define float @extractelt_nxv2f32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv2f32_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -282,6 +298,8 @@ define float @extractelt_nxv4f32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv4f32_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -314,6 +332,8 @@ define float @extractelt_nxv8f32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv8f32_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -346,6 +366,8 @@ define float @extractelt_nxv16f32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv16f32_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -378,6 +400,8 @@ define double @extractelt_nxv1f64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv1f64_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -410,6 +434,8 @@ define double @extractelt_nxv2f64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv2f64_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -442,6 +468,8 @@ define double @extractelt_nxv4f64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv4f64_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -474,6 +502,8 @@ define double @extractelt_nxv8f64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv8f64_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll @@ -26,6 +26,8 @@ define signext i8 @extractelt_nxv1i8_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv1i8_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -58,6 +60,8 @@ define signext i8 @extractelt_nxv2i8_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv2i8_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -90,6 +94,8 @@ define signext i8 @extractelt_nxv4i8_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv4i8_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -122,6 +128,8 @@ define signext i8 @extractelt_nxv8i8_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv8i8_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -154,6 +162,8 @@ define signext i8 @extractelt_nxv16i8_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv16i8_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -186,6 +196,8 @@ define signext i8 @extractelt_nxv32i8_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv32i8_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e8, m4, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -218,6 +230,8 @@ define signext i8 @extractelt_nxv64i8_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv64i8_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -250,6 +264,8 @@ define signext i16 @extractelt_nxv1i16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv1i16_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -282,6 +298,8 @@ define signext i16 @extractelt_nxv2i16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv2i16_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -314,6 +332,8 @@ define signext i16 @extractelt_nxv4i16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv4i16_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -346,6 +366,8 @@ define signext i16 @extractelt_nxv8i16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv8i16_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -378,6 +400,8 @@ define signext i16 @extractelt_nxv16i16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv16i16_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -410,6 +434,8 @@ define signext i16 @extractelt_nxv32i16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv32i16_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -442,6 +468,8 @@ define signext i32 @extractelt_nxv1i32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv1i32_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -474,6 +502,8 @@ define signext i32 @extractelt_nxv2i32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv2i32_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -506,6 +536,8 @@ define signext i32 @extractelt_nxv4i32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv4i32_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -538,6 +570,8 @@ define signext i32 @extractelt_nxv8i32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv8i32_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -570,6 +604,8 @@ define signext i32 @extractelt_nxv16i32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv16i32_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -602,6 +638,8 @@ define i64 @extractelt_nxv1i64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv1i64_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -634,6 +672,8 @@ define i64 @extractelt_nxv2i64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv2i64_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -666,6 +706,8 @@ define i64 @extractelt_nxv4i64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv4i64_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -698,6 +740,8 @@ define i64 @extractelt_nxv8i64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv8i64_idx: ; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -248,43 +248,77 @@ } define i8 @extractelt_v16i8_idx(<16 x i8>* %x, i32 signext %idx) nounwind { -; CHECK-LABEL: extractelt_v16i8_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: extractelt_v16i8_idx: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-NEXT: vle8.v v8, (a0) +; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, mu +; RV32-NEXT: vslidedown.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v16i8_idx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64-NEXT: vle8.v v8, (a0) +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, mu +; RV64-NEXT: vslidedown.vx v8, v8, a0 +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = extractelement <16 x i8> %a, i32 %idx ret i8 %b } define i16 @extractelt_v8i16_idx(<8 x i16>* %x, i32 signext %idx) nounwind { -; CHECK-LABEL: extractelt_v8i16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: extractelt_v8i16_idx: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, mu +; RV32-NEXT: vslidedown.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v8i16_idx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, mu +; RV64-NEXT: vslidedown.vx v8, v8, a0 +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = extractelement <8 x i16> %a, i32 %idx ret i16 %b } define i32 @extractelt_v4i32_idx(<4 x i32>* %x, i32 signext %idx) nounwind { -; CHECK-LABEL: extractelt_v4i32_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: extractelt_v4i32_idx: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV32-NEXT: vslidedown.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v4i32_idx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV64-NEXT: vslidedown.vx v8, v8, a0 +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = add <4 x i32> %a, %a %c = extractelement <4 x i32> %b, i32 %idx @@ -310,8 +344,10 @@ ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: srli a0, a0, 32 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-NEXT: vslidedown.vx v8, v8, a1 +; RV64-NEXT: vslidedown.vx v8, v8, a0 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x @@ -321,15 +357,27 @@ } define half @extractelt_v8f16_idx(<8 x half>* %x, i32 signext %idx) nounwind { -; CHECK-LABEL: extractelt_v8f16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: extractelt_v8f16_idx: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfadd.vv v8, v8, v8 +; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, mu +; RV32-NEXT: vslidedown.vx v8, v8, a1 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v8f16_idx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfadd.vv v8, v8, v8 +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, mu +; RV64-NEXT: vslidedown.vx v8, v8, a0 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %a = load <8 x half>, <8 x half>* %x %b = fadd <8 x half> %a, %a %c = extractelement <8 x half> %b, i32 %idx @@ -337,15 +385,27 @@ } define float @extractelt_v4f32_idx(<4 x float>* %x, i32 signext %idx) nounwind { -; CHECK-LABEL: extractelt_v4f32_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: extractelt_v4f32_idx: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfadd.vv v8, v8, v8 +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV32-NEXT: vslidedown.vx v8, v8, a1 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v4f32_idx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfadd.vv v8, v8, v8 +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV64-NEXT: vslidedown.vx v8, v8, a0 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %a = load <4 x float>, <4 x float>* %x %b = fadd <4 x float> %a, %a %c = extractelement <4 x float> %b, i32 %idx @@ -353,15 +413,27 @@ } define double @extractelt_v2f64_idx(<2 x double>* %x, i32 signext %idx) nounwind { -; CHECK-LABEL: extractelt_v2f64_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8 -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: extractelt_v2f64_idx: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfadd.vv v8, v8, v8 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV32-NEXT: vslidedown.vx v8, v8, a1 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v2f64_idx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfadd.vv v8, v8, v8 +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV64-NEXT: vslidedown.vx v8, v8, a0 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %a = load <2 x double>, <2 x double>* %x %b = fadd <2 x double> %a, %a %c = extractelement <2 x double> %b, i32 %idx @@ -369,44 +441,79 @@ } define i8 @extractelt_v32i8_idx(<32 x i8>* %x, i32 signext %idx) nounwind { -; CHECK-LABEL: extractelt_v32i8_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 32 -; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: extractelt_v32i8_idx: +; RV32: # %bb.0: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, mu +; RV32-NEXT: vle8.v v8, (a0) +; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, mu +; RV32-NEXT: vslidedown.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v32i8_idx: +; RV64: # %bb.0: +; RV64-NEXT: li a2, 32 +; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, mu +; RV64-NEXT: vle8.v v8, (a0) +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, mu +; RV64-NEXT: vslidedown.vx v8, v8, a0 +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = extractelement <32 x i8> %a, i32 %idx ret i8 %b } define i16 @extractelt_v16i16_idx(<16 x i16>* %x, i32 signext %idx) nounwind { -; CHECK-LABEL: extractelt_v16i16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: extractelt_v16i16_idx: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, mu +; RV32-NEXT: vslidedown.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v16i16_idx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, mu +; RV64-NEXT: vslidedown.vx v8, v8, a0 +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = extractelement <16 x i16> %a, i32 %idx ret i16 %b } define i32 @extractelt_v8i32_idx(<8 x i32>* %x, i32 signext %idx) nounwind { -; CHECK-LABEL: extractelt_v8i32_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: extractelt_v8i32_idx: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu +; RV32-NEXT: vslidedown.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v8i32_idx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, mu +; RV64-NEXT: vslidedown.vx v8, v8, a0 +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = add <8 x i32> %a, %a %c = extractelement <8 x i32> %b, i32 %idx @@ -432,8 +539,10 @@ ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: srli a0, a0, 32 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, mu -; RV64-NEXT: vslidedown.vx v8, v8, a1 +; RV64-NEXT: vslidedown.vx v8, v8, a0 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x @@ -443,15 +552,27 @@ } define half @extractelt_v16f16_idx(<16 x half>* %x, i32 signext %idx) nounwind { -; CHECK-LABEL: extractelt_v16f16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8 -; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: extractelt_v16f16_idx: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfadd.vv v8, v8, v8 +; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, mu +; RV32-NEXT: vslidedown.vx v8, v8, a1 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v16f16_idx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfadd.vv v8, v8, v8 +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, mu +; RV64-NEXT: vslidedown.vx v8, v8, a0 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %a = load <16 x half>, <16 x half>* %x %b = fadd <16 x half> %a, %a %c = extractelement <16 x half> %b, i32 %idx @@ -459,15 +580,27 @@ } define float @extractelt_v8f32_idx(<8 x float>* %x, i32 signext %idx) nounwind { -; CHECK-LABEL: extractelt_v8f32_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8 -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: extractelt_v8f32_idx: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfadd.vv v8, v8, v8 +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu +; RV32-NEXT: vslidedown.vx v8, v8, a1 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v8f32_idx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfadd.vv v8, v8, v8 +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, mu +; RV64-NEXT: vslidedown.vx v8, v8, a0 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %a = load <8 x float>, <8 x float>* %x %b = fadd <8 x float> %a, %a %c = extractelement <8 x float> %b, i32 %idx @@ -475,15 +608,27 @@ } define double @extractelt_v4f64_idx(<4 x double>* %x, i32 signext %idx) nounwind { -; CHECK-LABEL: extractelt_v4f64_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfadd.vv v8, v8, v8 -; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: extractelt_v4f64_idx: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfadd.vv v8, v8, v8 +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, mu +; RV32-NEXT: vslidedown.vx v8, v8, a1 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v4f64_idx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfadd.vv v8, v8, v8 +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, mu +; RV64-NEXT: vslidedown.vx v8, v8, a0 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %a = load <4 x double>, <4 x double>* %x %b = fadd <4 x double> %a, %a %c = extractelement <4 x double> %b, i32 %idx @@ -514,8 +659,10 @@ ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: srli a0, a0, 32 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, mu -; RV64-NEXT: vslidedown.vx v8, v8, a1 +; RV64-NEXT: vslidedown.vx v8, v8, a0 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %a = load <3 x i64>, <3 x i64>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll @@ -17,34 +17,19 @@ } define <1 x i1> @insertelt_idx_v1i1(<1 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind { -; RV32-LABEL: insertelt_idx_v1i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: addi a0, a1, 1 -; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu -; RV32-NEXT: vslideup.vx v9, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; RV32-NEXT: vand.vi v8, v9, 1 -; RV32-NEXT: vmsne.vi v0, v8, 0 -; RV32-NEXT: ret -; -; RV64-LABEL: insertelt_idx_v1i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: sext.w a0, a1 -; RV64-NEXT: addi a1, a0, 1 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, tu, mu -; RV64-NEXT: vslideup.vx v9, v8, a0 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; RV64-NEXT: vand.vi v8, v9, 1 -; RV64-NEXT: vmsne.vi v0, v8, 0 -; RV64-NEXT: ret +; CHECK-LABEL: insertelt_idx_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %y = insertelement <1 x i1> %x, i1 %elt, i32 %idx ret <1 x i1> %y } @@ -67,34 +52,19 @@ } define <2 x i1> @insertelt_idx_v2i1(<2 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind { -; RV32-LABEL: insertelt_idx_v2i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, mu -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: addi a0, a1, 1 -; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu -; RV32-NEXT: vslideup.vx v9, v8, a1 -; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, mu -; RV32-NEXT: vand.vi v8, v9, 1 -; RV32-NEXT: vmsne.vi v0, v8, 0 -; RV32-NEXT: ret -; -; RV64-LABEL: insertelt_idx_v2i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, mu -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: sext.w a0, a1 -; RV64-NEXT: addi a1, a0, 1 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, tu, mu -; RV64-NEXT: vslideup.vx v9, v8, a0 -; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, mu -; RV64-NEXT: vand.vi v8, v9, 1 -; RV64-NEXT: vmsne.vi v0, v8, 0 -; RV64-NEXT: ret +; CHECK-LABEL: insertelt_idx_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %y = insertelement <2 x i1> %x, i1 %elt, i32 %idx ret <2 x i1> %y } @@ -117,34 +87,19 @@ } define <8 x i1> @insertelt_idx_v8i1(<8 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind { -; RV32-LABEL: insertelt_idx_v8i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: addi a0, a1, 1 -; RV32-NEXT: vsetvli zero, a0, e8, mf2, tu, mu -; RV32-NEXT: vslideup.vx v9, v8, a1 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV32-NEXT: vand.vi v8, v9, 1 -; RV32-NEXT: vmsne.vi v0, v8, 0 -; RV32-NEXT: ret -; -; RV64-LABEL: insertelt_idx_v8i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: sext.w a0, a1 -; RV64-NEXT: addi a1, a0, 1 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, tu, mu -; RV64-NEXT: vslideup.vx v9, v8, a0 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV64-NEXT: vand.vi v8, v9, 1 -; RV64-NEXT: vmsne.vi v0, v8, 0 -; RV64-NEXT: ret +; CHECK-LABEL: insertelt_idx_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %y = insertelement <8 x i1> %x, i1 %elt, i32 %idx ret <8 x i1> %y } @@ -168,36 +123,23 @@ } define <64 x i1> @insertelt_idx_v64i1(<64 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind { -; RV32-LABEL: insertelt_idx_v64i1: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 64 -; RV32-NEXT: vsetvli zero, a2, e8, m4, ta, mu -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, 1, v0 -; RV32-NEXT: addi a0, a1, 1 -; RV32-NEXT: vsetvli zero, a0, e8, m4, tu, mu -; RV32-NEXT: vslideup.vx v12, v8, a1 -; RV32-NEXT: vsetvli zero, a2, e8, m4, ta, mu -; RV32-NEXT: vand.vi v8, v12, 1 -; RV32-NEXT: vmsne.vi v0, v8, 0 -; RV32-NEXT: ret -; -; RV64-LABEL: insertelt_idx_v64i1: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 64 -; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, mu -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vmv.v.i v12, 0 -; RV64-NEXT: vmerge.vim v12, v12, 1, v0 -; RV64-NEXT: sext.w a0, a1 -; RV64-NEXT: addi a1, a0, 1 -; RV64-NEXT: vsetvli zero, a1, e8, m4, tu, mu -; RV64-NEXT: vslideup.vx v12, v8, a0 -; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, mu -; RV64-NEXT: vand.vi v8, v12, 1 -; RV64-NEXT: vmsne.vi v0, v8, 0 -; RV64-NEXT: ret +; CHECK-LABEL: insertelt_idx_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu +; CHECK-NEXT: vslideup.vx v12, v8, a1 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu +; CHECK-NEXT: vand.vi v8, v12, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %y = insertelement <64 x i1> %x, i1 %elt, i32 %idx ret <64 x i1> %y } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -110,7 +110,8 @@ ; RV64-NEXT: vsetvli zero, a3, e16, m4, ta, mu ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: vmv.s.x v12, a1 -; RV64-NEXT: sext.w a1, a2 +; RV64-NEXT: slli a1, a2, 32 +; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: addi a2, a1, 1 ; RV64-NEXT: vsetvli zero, a2, e16, m4, tu, mu ; RV64-NEXT: vslideup.vx v8, v12, a1 @@ -141,7 +142,8 @@ ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: vfmv.s.f v10, fa0 -; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: addi a2, a1, 1 ; RV64-NEXT: vsetvli zero, a2, e32, m2, tu, mu ; RV64-NEXT: vslideup.vx v8, v10, a1 @@ -190,7 +192,8 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a2, -1 ; RV64-NEXT: vmv.s.x v12, a2 -; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: addi a2, a1, 1 ; RV64-NEXT: vsetvli zero, a2, e64, m4, tu, mu ; RV64-NEXT: vslideup.vx v8, v12, a1 @@ -239,7 +242,8 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a2, 6 ; RV64-NEXT: vmv.s.x v12, a2 -; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: addi a2, a1, 1 ; RV64-NEXT: vsetvli zero, a2, e64, m4, tu, mu ; RV64-NEXT: vslideup.vx v8, v12, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll @@ -29,6 +29,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu ; CHECK-NEXT: vslideup.vx v8, v9, a0 @@ -64,6 +66,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu ; CHECK-NEXT: vslideup.vx v8, v9, a0 @@ -99,6 +103,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu ; CHECK-NEXT: vslideup.vx v8, v9, a0 @@ -134,6 +140,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu ; CHECK-NEXT: vslideup.vx v8, v10, a0 @@ -169,6 +177,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.s.f v12, fa0 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu ; CHECK-NEXT: vslideup.vx v8, v12, a0 @@ -204,6 +214,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu ; CHECK-NEXT: vslideup.vx v8, v16, a0 @@ -239,6 +251,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu ; CHECK-NEXT: vslideup.vx v8, v9, a0 @@ -274,6 +288,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu ; CHECK-NEXT: vslideup.vx v8, v9, a0 @@ -309,6 +325,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu ; CHECK-NEXT: vslideup.vx v8, v10, a0 @@ -344,6 +362,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.s.f v12, fa0 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu ; CHECK-NEXT: vslideup.vx v8, v12, a0 @@ -379,6 +399,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu ; CHECK-NEXT: vslideup.vx v8, v16, a0 @@ -414,6 +436,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu ; CHECK-NEXT: vslideup.vx v8, v9, a0 @@ -449,6 +473,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu ; CHECK-NEXT: vslideup.vx v8, v10, a0 @@ -484,6 +510,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.s.f v12, fa0 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu ; CHECK-NEXT: vslideup.vx v8, v12, a0 @@ -519,6 +547,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu ; CHECK-NEXT: vslideup.vx v8, v16, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll @@ -29,9 +29,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu -; CHECK-NEXT: vslideup.vx v8, v9, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx ret %r @@ -64,9 +66,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, mu ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu -; CHECK-NEXT: vslideup.vx v8, v9, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx ret %r @@ -99,9 +103,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu -; CHECK-NEXT: vslideup.vx v8, v9, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx ret %r @@ -134,9 +140,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, mu ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu -; CHECK-NEXT: vslideup.vx v8, v9, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx ret %r @@ -169,9 +177,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, mu ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu -; CHECK-NEXT: vslideup.vx v8, v10, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v10, a0 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx ret %r @@ -204,9 +214,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, mu ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu -; CHECK-NEXT: vslideup.vx v8, v12, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v12, a0 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx ret %r @@ -239,9 +251,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, mu ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu -; CHECK-NEXT: vslideup.vx v8, v16, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx ret %r @@ -274,9 +288,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vslideup.vx v8, v9, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx ret %r @@ -309,9 +325,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vslideup.vx v8, v9, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx ret %r @@ -344,9 +362,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vslideup.vx v8, v9, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx ret %r @@ -379,9 +399,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, mu ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vslideup.vx v8, v10, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v10, a0 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx ret %r @@ -414,9 +436,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, mu ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vslideup.vx v8, v12, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v12, a0 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx ret %r @@ -449,9 +473,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, mu ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vslideup.vx v8, v16, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx ret %r @@ -484,9 +510,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vslideup.vx v8, v9, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx ret %r @@ -519,9 +547,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vslideup.vx v8, v9, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx ret %r @@ -554,9 +584,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, mu ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vslideup.vx v8, v10, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v10, a0 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx ret %r @@ -589,9 +621,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, mu ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vslideup.vx v8, v12, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v12, a0 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx ret %r @@ -624,9 +658,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vslideup.vx v8, v16, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx ret %r @@ -659,7 +695,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, mu ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu ; CHECK-NEXT: vslideup.vx v8, v9, a0 @@ -695,7 +732,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e64, m2, ta, mu ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu ; CHECK-NEXT: vslideup.vx v8, v10, a0 @@ -731,7 +769,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, mu ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu ; CHECK-NEXT: vslideup.vx v8, v12, a0 @@ -767,7 +806,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, mu ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu ; CHECK-NEXT: vslideup.vx v8, v16, a0 diff --git a/llvm/test/CodeGen/VE/Vector/extract_elt.ll b/llvm/test/CodeGen/VE/Vector/extract_elt.ll --- a/llvm/test/CodeGen/VE/Vector/extract_elt.ll +++ b/llvm/test/CodeGen/VE/Vector/extract_elt.ll @@ -6,6 +6,7 @@ define fastcc i64 @extract_rr_v256i64(i32 signext %idx, <256 x i64> %v) { ; CHECK-LABEL: extract_rr_v256i64: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lvs %s0, %v0(%s0) ; CHECK-NEXT: b.l.t (, %s10) %ret = extractelement <256 x i64> %v, i32 %idx @@ -45,6 +46,7 @@ define fastcc i32 @extract_rr_v256i32(i32 signext %idx, <256 x i32> %v) { ; CHECK-LABEL: extract_rr_v256i32: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lvs %s0, %v0(%s0) ; CHECK-NEXT: b.l.t (, %s10) %ret = extractelement <256 x i32> %v, i32 %idx @@ -84,7 +86,10 @@ define fastcc i32 @extract_rr_v512i32(<512 x i32> %v, i32 signext %idx) { ; CHECK-LABEL: extract_rr_v512i32: ; CHECK: # %bb.0: -; CHECK-NEXT: srl %s1, %s0, 1 +; CHECK-NEXT: lea %s1, -2 +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: and %s1, %s0, %s1 +; CHECK-NEXT: srl %s1, %s1, 1 ; CHECK-NEXT: lvs %s1, %v0(%s1) ; CHECK-NEXT: nnd %s0, %s0, (63)0 ; CHECK-NEXT: sla.w.sx %s0, %s0, 5 @@ -100,6 +105,7 @@ define fastcc double @extract_rr_v256f64(i32 signext %idx, <256 x double> %v) { ; CHECK-LABEL: extract_rr_v256f64: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lvs %s0, %v0(%s0) ; CHECK-NEXT: b.l.t (, %s10) %ret = extractelement <256 x double> %v, i32 %idx @@ -139,6 +145,7 @@ define fastcc float @extract_rr_v256f32(i32 signext %idx, <256 x float> %v) { ; CHECK-LABEL: extract_rr_v256f32: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lvs %s0, %v0(%s0) ; CHECK-NEXT: b.l.t (, %s10) %ret = extractelement <256 x float> %v, i32 %idx @@ -179,7 +186,10 @@ define fastcc float @extract_rr_v512f32(<512 x float> %v, i32 signext %idx) { ; CHECK-LABEL: extract_rr_v512f32: ; CHECK: # %bb.0: -; CHECK-NEXT: srl %s1, %s0, 1 +; CHECK-NEXT: lea %s1, -2 +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: and %s1, %s0, %s1 +; CHECK-NEXT: srl %s1, %s1, 1 ; CHECK-NEXT: lvs %s1, %v0(%s1) ; CHECK-NEXT: nnd %s0, %s0, (63)0 ; CHECK-NEXT: sla.w.sx %s0, %s0, 5 diff --git a/llvm/test/CodeGen/VE/Vector/insert_elt.ll b/llvm/test/CodeGen/VE/Vector/insert_elt.ll --- a/llvm/test/CodeGen/VE/Vector/insert_elt.ll +++ b/llvm/test/CodeGen/VE/Vector/insert_elt.ll @@ -6,6 +6,7 @@ define fastcc <256 x i64> @insert_rr_v256i64(i32 signext %idx, i64 %s) { ; CHECK-LABEL: insert_rr_v256i64: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lsv %v0(%s0), %s1 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x i64> undef, i64 %s, i32 %idx @@ -46,6 +47,7 @@ ; CHECK-LABEL: insert_rr_v256i32: ; CHECK: # %bb.0: ; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lsv %v0(%s0), %s1 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x i32> undef, i32 %s, i32 %idx @@ -94,6 +96,9 @@ ; CHECK-NEXT: nnd %s2, %s0, (63)0 ; CHECK-NEXT: sla.w.sx %s2, %s2, 5 ; CHECK-NEXT: sll %s1, %s1, %s2 +; CHECK-NEXT: lea %s3, -2 +; CHECK-NEXT: and %s3, %s3, (32)0 +; CHECK-NEXT: and %s0, %s0, %s3 ; CHECK-NEXT: srl %s0, %s0, 1 ; CHECK-NEXT: lvs %s3, %v0(%s0) ; CHECK-NEXT: srl %s2, (32)1, %s2 @@ -110,6 +115,7 @@ define fastcc <256 x double> @insert_rr_v256f64(i32 signext %idx, double %s) { ; CHECK-LABEL: insert_rr_v256f64: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lsv %v0(%s0), %s1 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x double> undef, double %s, i32 %idx @@ -149,6 +155,7 @@ define fastcc <256 x float> @insert_rr_v256f32(i32 signext %idx, float %s) { ; CHECK-LABEL: insert_rr_v256f32: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lsv %v0(%s0), %s1 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x float> undef, float %s, i32 %idx @@ -193,7 +200,10 @@ ; CHECK-LABEL: insert_rr_v512f32: ; CHECK: # %bb.0: ; CHECK-NEXT: sra.l %s1, %s1, 32 -; CHECK-NEXT: srl %s2, %s0, 1 +; CHECK-NEXT: lea %s2, -2 +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: and %s2, %s0, %s2 +; CHECK-NEXT: srl %s2, %s2, 1 ; CHECK-NEXT: lvs %s3, %v0(%s2) ; CHECK-NEXT: nnd %s0, %s0, (63)0 ; CHECK-NEXT: sla.w.sx %s0, %s0, 5 diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll --- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll @@ -97,8 +97,17 @@ ; CHECK-LABEL: swizzle_one_i8x16: ; CHECK: .functype swizzle_one_i8x16 (v128, v128) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1 -; CHECK-NEXT: return $pop0 +; CHECK-NEXT: global.get $push5=, __stack_pointer +; CHECK-NEXT: i32.const $push6=, 16 +; CHECK-NEXT: i32.sub $push8=, $pop5, $pop6 +; CHECK-NEXT: local.tee $push7=, $2=, $pop8 +; CHECK-NEXT: v128.store 0($pop7), $0 +; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 0 +; CHECK-NEXT: i32.const $push1=, 15 +; CHECK-NEXT: i32.and $push2=, $pop0, $pop1 +; CHECK-NEXT: i32.or $push3=, $2, $pop2 +; CHECK-NEXT: v128.load8_splat $push4=, 0($pop3) +; CHECK-NEXT: return $pop4 %m0 = extractelement <16 x i8> %mask, i32 0 %s0 = extractelement <16 x i8> %src, i8 %m0 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 @@ -109,8 +118,107 @@ ; CHECK-LABEL: swizzle_all_i8x16: ; CHECK: .functype swizzle_all_i8x16 (v128, v128) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1 -; CHECK-NEXT: return $pop0 +; CHECK-NEXT: global.get $push80=, __stack_pointer +; CHECK-NEXT: i32.const $push81=, 16 +; CHECK-NEXT: i32.sub $push98=, $pop80, $pop81 +; CHECK-NEXT: local.tee $push97=, $2=, $pop98 +; CHECK-NEXT: v128.store 0($pop97), $0 +; CHECK-NEXT: i8x16.extract_lane_u $push61=, $1, 0 +; CHECK-NEXT: i32.const $push1=, 15 +; CHECK-NEXT: i32.and $push62=, $pop61, $pop1 +; CHECK-NEXT: i32.or $push63=, $2, $pop62 +; CHECK-NEXT: v128.load8_splat $push64=, 0($pop63) +; CHECK-NEXT: i8x16.extract_lane_u $push57=, $1, 1 +; CHECK-NEXT: i32.const $push96=, 15 +; CHECK-NEXT: i32.and $push58=, $pop57, $pop96 +; CHECK-NEXT: i32.or $push59=, $2, $pop58 +; CHECK-NEXT: i32.load8_u $push60=, 0($pop59) +; CHECK-NEXT: i8x16.replace_lane $push65=, $pop64, 1, $pop60 +; CHECK-NEXT: i8x16.extract_lane_u $push53=, $1, 2 +; CHECK-NEXT: i32.const $push95=, 15 +; CHECK-NEXT: i32.and $push54=, $pop53, $pop95 +; CHECK-NEXT: i32.or $push55=, $2, $pop54 +; CHECK-NEXT: i32.load8_u $push56=, 0($pop55) +; CHECK-NEXT: i8x16.replace_lane $push66=, $pop65, 2, $pop56 +; CHECK-NEXT: i8x16.extract_lane_u $push49=, $1, 3 +; CHECK-NEXT: i32.const $push94=, 15 +; CHECK-NEXT: i32.and $push50=, $pop49, $pop94 +; CHECK-NEXT: i32.or $push51=, $2, $pop50 +; CHECK-NEXT: i32.load8_u $push52=, 0($pop51) +; CHECK-NEXT: i8x16.replace_lane $push67=, $pop66, 3, $pop52 +; CHECK-NEXT: i8x16.extract_lane_u $push45=, $1, 4 +; CHECK-NEXT: i32.const $push93=, 15 +; CHECK-NEXT: i32.and $push46=, $pop45, $pop93 +; CHECK-NEXT: i32.or $push47=, $2, $pop46 +; CHECK-NEXT: i32.load8_u $push48=, 0($pop47) +; CHECK-NEXT: i8x16.replace_lane $push68=, $pop67, 4, $pop48 +; CHECK-NEXT: i8x16.extract_lane_u $push41=, $1, 5 +; CHECK-NEXT: i32.const $push92=, 15 +; CHECK-NEXT: i32.and $push42=, $pop41, $pop92 +; CHECK-NEXT: i32.or $push43=, $2, $pop42 +; CHECK-NEXT: i32.load8_u $push44=, 0($pop43) +; CHECK-NEXT: i8x16.replace_lane $push69=, $pop68, 5, $pop44 +; CHECK-NEXT: i8x16.extract_lane_u $push37=, $1, 6 +; CHECK-NEXT: i32.const $push91=, 15 +; CHECK-NEXT: i32.and $push38=, $pop37, $pop91 +; CHECK-NEXT: i32.or $push39=, $2, $pop38 +; CHECK-NEXT: i32.load8_u $push40=, 0($pop39) +; CHECK-NEXT: i8x16.replace_lane $push70=, $pop69, 6, $pop40 +; CHECK-NEXT: i8x16.extract_lane_u $push33=, $1, 7 +; CHECK-NEXT: i32.const $push90=, 15 +; CHECK-NEXT: i32.and $push34=, $pop33, $pop90 +; CHECK-NEXT: i32.or $push35=, $2, $pop34 +; CHECK-NEXT: i32.load8_u $push36=, 0($pop35) +; CHECK-NEXT: i8x16.replace_lane $push71=, $pop70, 7, $pop36 +; CHECK-NEXT: i8x16.extract_lane_u $push29=, $1, 8 +; CHECK-NEXT: i32.const $push89=, 15 +; CHECK-NEXT: i32.and $push30=, $pop29, $pop89 +; CHECK-NEXT: i32.or $push31=, $2, $pop30 +; CHECK-NEXT: i32.load8_u $push32=, 0($pop31) +; CHECK-NEXT: i8x16.replace_lane $push72=, $pop71, 8, $pop32 +; CHECK-NEXT: i8x16.extract_lane_u $push25=, $1, 9 +; CHECK-NEXT: i32.const $push88=, 15 +; CHECK-NEXT: i32.and $push26=, $pop25, $pop88 +; CHECK-NEXT: i32.or $push27=, $2, $pop26 +; CHECK-NEXT: i32.load8_u $push28=, 0($pop27) +; CHECK-NEXT: i8x16.replace_lane $push73=, $pop72, 9, $pop28 +; CHECK-NEXT: i8x16.extract_lane_u $push21=, $1, 10 +; CHECK-NEXT: i32.const $push87=, 15 +; CHECK-NEXT: i32.and $push22=, $pop21, $pop87 +; CHECK-NEXT: i32.or $push23=, $2, $pop22 +; CHECK-NEXT: i32.load8_u $push24=, 0($pop23) +; CHECK-NEXT: i8x16.replace_lane $push74=, $pop73, 10, $pop24 +; CHECK-NEXT: i8x16.extract_lane_u $push17=, $1, 11 +; CHECK-NEXT: i32.const $push86=, 15 +; CHECK-NEXT: i32.and $push18=, $pop17, $pop86 +; CHECK-NEXT: i32.or $push19=, $2, $pop18 +; CHECK-NEXT: i32.load8_u $push20=, 0($pop19) +; CHECK-NEXT: i8x16.replace_lane $push75=, $pop74, 11, $pop20 +; CHECK-NEXT: i8x16.extract_lane_u $push13=, $1, 12 +; CHECK-NEXT: i32.const $push85=, 15 +; CHECK-NEXT: i32.and $push14=, $pop13, $pop85 +; CHECK-NEXT: i32.or $push15=, $2, $pop14 +; CHECK-NEXT: i32.load8_u $push16=, 0($pop15) +; CHECK-NEXT: i8x16.replace_lane $push76=, $pop75, 12, $pop16 +; CHECK-NEXT: i8x16.extract_lane_u $push9=, $1, 13 +; CHECK-NEXT: i32.const $push84=, 15 +; CHECK-NEXT: i32.and $push10=, $pop9, $pop84 +; CHECK-NEXT: i32.or $push11=, $2, $pop10 +; CHECK-NEXT: i32.load8_u $push12=, 0($pop11) +; CHECK-NEXT: i8x16.replace_lane $push77=, $pop76, 13, $pop12 +; CHECK-NEXT: i8x16.extract_lane_u $push5=, $1, 14 +; CHECK-NEXT: i32.const $push83=, 15 +; CHECK-NEXT: i32.and $push6=, $pop5, $pop83 +; CHECK-NEXT: i32.or $push7=, $2, $pop6 +; CHECK-NEXT: i32.load8_u $push8=, 0($pop7) +; CHECK-NEXT: i8x16.replace_lane $push78=, $pop77, 14, $pop8 +; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 15 +; CHECK-NEXT: i32.const $push82=, 15 +; CHECK-NEXT: i32.and $push2=, $pop0, $pop82 +; CHECK-NEXT: i32.or $push3=, $2, $pop2 +; CHECK-NEXT: i32.load8_u $push4=, 0($pop3) +; CHECK-NEXT: i8x16.replace_lane $push79=, $pop78, 15, $pop4 +; CHECK-NEXT: return $pop79 %m0 = extractelement <16 x i8> %mask, i32 0 %s0 = extractelement <16 x i8> %src, i8 %m0 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 @@ -210,14 +318,25 @@ ; CHECK-LABEL: mashup_swizzle_i8x16: ; CHECK: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1 -; CHECK-NEXT: i8x16.replace_lane $push1=, $pop0, 3, $2 -; CHECK-NEXT: i32.const $push2=, 42 -; CHECK-NEXT: i8x16.replace_lane $push3=, $pop1, 4, $pop2 -; CHECK-NEXT: i8x16.replace_lane $push4=, $pop3, 12, $2 -; CHECK-NEXT: i32.const $push6=, 42 -; CHECK-NEXT: i8x16.replace_lane $push5=, $pop4, 14, $pop6 -; CHECK-NEXT: return $pop5 +; CHECK-NEXT: global.get $push12=, __stack_pointer +; CHECK-NEXT: i32.const $push13=, 16 +; CHECK-NEXT: i32.sub $push16=, $pop12, $pop13 +; CHECK-NEXT: local.tee $push15=, $3=, $pop16 +; CHECK-NEXT: v128.store 0($pop15), $0 +; CHECK-NEXT: i8x16.extract_lane_u $push7=, $1, 7 +; CHECK-NEXT: i32.const $push1=, 15 +; CHECK-NEXT: i32.and $push8=, $pop7, $pop1 +; CHECK-NEXT: i32.or $push9=, $3, $pop8 +; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 0 +; CHECK-NEXT: i32.const $push14=, 15 +; CHECK-NEXT: i32.and $push2=, $pop0, $pop14 +; CHECK-NEXT: i32.or $push3=, $3, $pop2 +; CHECK-NEXT: v128.const $push4=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0 +; CHECK-NEXT: v128.load8_lane $push5=, 0($pop3), $pop4, 0 +; CHECK-NEXT: i8x16.replace_lane $push6=, $pop5, 3, $2 +; CHECK-NEXT: v128.load8_lane $push10=, 0($pop9), $pop6, 7 +; CHECK-NEXT: i8x16.replace_lane $push11=, $pop10, 12, $2 +; CHECK-NEXT: return $pop11 %m0 = extractelement <16 x i8> %mask, i32 0 %s0 = extractelement <16 x i8> %src, i8 %m0 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 diff --git a/llvm/test/CodeGen/X86/extract-insert.ll b/llvm/test/CodeGen/X86/extract-insert.ll --- a/llvm/test/CodeGen/X86/extract-insert.ll +++ b/llvm/test/CodeGen/X86/extract-insert.ll @@ -5,6 +5,7 @@ define i32 @extractelt_undef_insertelt(i32 %x, i32 %y) { ; CHECK-LABEL: extractelt_undef_insertelt: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ret{{[l|q]}} %b = insertelement <4 x i32> zeroinitializer, i32 %x, i64 3 %c = icmp uge i32 %y, %y diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll --- a/llvm/test/CodeGen/X86/insertelement-var-index.ll +++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll @@ -996,7 +996,7 @@ ; ; AVX512-LABEL: arg_i64_v2i64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %xmm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 ; AVX512-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} @@ -1101,7 +1101,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: movapd %xmm0, %xmm2 ; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0] -; SSE41-NEXT: movslq %edi, %rax +; SSE41-NEXT: movl %edi, %eax ; SSE41-NEXT: movq %rax, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -1112,7 +1112,7 @@ ; AVX1-LABEL: arg_f64_v2f64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] -; AVX1-NEXT: movslq %edi, %rax +; AVX1-NEXT: movl %edi, %eax ; AVX1-NEXT: vmovq %rax, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 @@ -1122,7 +1122,7 @@ ; AVX2-LABEL: arg_f64_v2f64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] -; AVX2-NEXT: movslq %edi, %rax +; AVX2-NEXT: movl %edi, %eax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 @@ -1131,7 +1131,7 @@ ; ; AVX512-LABEL: arg_f64_v2f64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %edi, %rax +; AVX512-NEXT: movl %edi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %xmm2 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1 ; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] @@ -1346,7 +1346,7 @@ ; ; AVX512-LABEL: load_i64_v2i64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %xmm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 ; AVX512-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1} @@ -1458,7 +1458,7 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: movapd %xmm0, %xmm1 ; SSE41-NEXT: movddup {{.*#+}} xmm2 = mem[0,0] -; SSE41-NEXT: movslq %esi, %rax +; SSE41-NEXT: movl %esi, %eax ; SSE41-NEXT: movq %rax, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -1469,7 +1469,7 @@ ; AVX1-LABEL: load_f64_v2f64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; AVX1-NEXT: movslq %esi, %rax +; AVX1-NEXT: movl %esi, %eax ; AVX1-NEXT: vmovq %rax, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 @@ -1479,7 +1479,7 @@ ; AVX2-LABEL: load_f64_v2f64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; AVX2-NEXT: movslq %esi, %rax +; AVX2-NEXT: movl %esi, %eax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 @@ -1488,7 +1488,7 @@ ; ; AVX512-LABEL: load_f64_v2f64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %xmm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 ; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0] @@ -1733,7 +1733,7 @@ ; ; AVX512-LABEL: arg_i64_v4i64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %ymm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 ; AVX512-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} @@ -1834,7 +1834,7 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 -; AVX1-NEXT: movslq %edi, %rax +; AVX1-NEXT: movl %edi, %eax ; AVX1-NEXT: vmovq %rax, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 @@ -1846,7 +1846,7 @@ ; AVX2-LABEL: arg_f64_v4f64: ; AVX2: # %bb.0: ; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 -; AVX2-NEXT: movslq %edi, %rax +; AVX2-NEXT: movl %edi, %eax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 @@ -1855,7 +1855,7 @@ ; ; AVX512-LABEL: arg_f64_v4f64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %edi, %rax +; AVX512-NEXT: movl %edi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %ymm2 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %k1 ; AVX512-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1} @@ -2114,7 +2114,7 @@ ; ; AVX512-LABEL: load_i64_v4i64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %ymm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 ; AVX512-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} @@ -2218,7 +2218,7 @@ ; ; AVX1-LABEL: load_f64_v4f64: ; AVX1: # %bb.0: -; AVX1-NEXT: movslq %esi, %rax +; AVX1-NEXT: movl %esi, %eax ; AVX1-NEXT: vmovq %rax, %xmm1 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 @@ -2231,7 +2231,7 @@ ; AVX2-LABEL: load_f64_v4f64: ; AVX2: # %bb.0: ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1 -; AVX2-NEXT: movslq %esi, %rax +; AVX2-NEXT: movl %esi, %eax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 @@ -2240,7 +2240,7 @@ ; ; AVX512-LABEL: load_f64_v4f64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %ymm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 ; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} @@ -2273,6 +2273,15 @@ ; SSE-LABEL: PR44139: ; SSE: # %bb.0: ; SSE-NEXT: movl (%rdi), %eax +; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] +; SSE-NEXT: movdqa %xmm0, 96(%rdi) +; SSE-NEXT: movdqa %xmm0, 112(%rdi) +; SSE-NEXT: movdqa %xmm0, 64(%rdi) +; SSE-NEXT: movdqa %xmm0, 80(%rdi) +; SSE-NEXT: movdqa %xmm0, 32(%rdi) +; SSE-NEXT: movdqa %xmm0, 48(%rdi) +; SSE-NEXT: movdqa %xmm0, (%rdi) +; SSE-NEXT: movdqa %xmm0, 16(%rdi) ; SSE-NEXT: leal 2147483647(%rax), %ecx ; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: cmovnsl %eax, %ecx @@ -2283,23 +2292,51 @@ ; SSE-NEXT: divl %ecx ; SSE-NEXT: retq ; -; AVX-LABEL: PR44139: -; AVX: # %bb.0: -; AVX-NEXT: movl (%rdi), %eax -; AVX-NEXT: leal 2147483647(%rax), %ecx -; AVX-NEXT: testl %eax, %eax -; AVX-NEXT: cmovnsl %eax, %ecx -; AVX-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 -; AVX-NEXT: addl %eax, %ecx -; AVX-NEXT: # kill: def $eax killed $eax killed $rax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %ecx -; AVX-NEXT: retq +; AVX1OR2-LABEL: PR44139: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX1OR2-NEXT: movl (%rdi), %eax +; AVX1OR2-NEXT: vmovaps %ymm0, 64(%rdi) +; AVX1OR2-NEXT: vmovaps %ymm0, 96(%rdi) +; AVX1OR2-NEXT: vmovaps %ymm0, (%rdi) +; AVX1OR2-NEXT: vmovaps %ymm0, 32(%rdi) +; AVX1OR2-NEXT: leal 2147483647(%rax), %ecx +; AVX1OR2-NEXT: testl %eax, %eax +; AVX1OR2-NEXT: cmovnsl %eax, %ecx +; AVX1OR2-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 +; AVX1OR2-NEXT: addl %eax, %ecx +; AVX1OR2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX1OR2-NEXT: xorl %edx, %edx +; AVX1OR2-NEXT: divl %ecx +; AVX1OR2-NEXT: vzeroupper +; AVX1OR2-NEXT: retq +; +; AVX512-LABEL: PR44139: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastsd (%rdi), %zmm0 +; AVX512-NEXT: movl (%rdi), %eax +; AVX512-NEXT: vmovaps %zmm0, (%rdi) +; AVX512-NEXT: vmovaps %zmm0, 64(%rdi) +; AVX512-NEXT: leal 2147483647(%rax), %ecx +; AVX512-NEXT: testl %eax, %eax +; AVX512-NEXT: cmovnsl %eax, %ecx +; AVX512-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 +; AVX512-NEXT: addl %eax, %ecx +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: xorl %edx, %edx +; AVX512-NEXT: divl %ecx +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq ; ; X86AVX2-LABEL: PR44139: ; X86AVX2: # %bb.0: -; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86AVX2-NEXT: movl (%eax), %eax +; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86AVX2-NEXT: movl (%ecx), %eax +; X86AVX2-NEXT: vbroadcastsd (%ecx), %ymm0 +; X86AVX2-NEXT: vmovaps %ymm0, 64(%ecx) +; X86AVX2-NEXT: vmovaps %ymm0, 96(%ecx) +; X86AVX2-NEXT: vmovaps %ymm0, (%ecx) +; X86AVX2-NEXT: vmovaps %ymm0, 32(%ecx) ; X86AVX2-NEXT: leal 2147483647(%eax), %ecx ; X86AVX2-NEXT: testl %eax, %eax ; X86AVX2-NEXT: cmovnsl %eax, %ecx @@ -2307,6 +2344,7 @@ ; X86AVX2-NEXT: addl %eax, %ecx ; X86AVX2-NEXT: xorl %edx, %edx ; X86AVX2-NEXT: divl %ecx +; X86AVX2-NEXT: vzeroupper ; X86AVX2-NEXT: retl %L = load <16 x i64>, ptr %p %E1 = extractelement <16 x i64> %L, i64 0 diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll --- a/llvm/test/CodeGen/X86/var-permute-128.ll +++ b/llvm/test/CodeGen/X86/var-permute-128.ll @@ -129,7 +129,7 @@ define <8 x i16> @var_shuffle_v8i16(<8 x i16> %v, <8 x i16> %indices) nounwind { ; SSE3-LABEL: var_shuffle_v8i16: ; SSE3: # %bb.0: -; SSE3-NEXT: movd %xmm1, %r8d +; SSE3-NEXT: pextrw $0, %xmm1, %r8d ; SSE3-NEXT: pextrw $1, %xmm1, %r9d ; SSE3-NEXT: pextrw $2, %xmm1, %r10d ; SSE3-NEXT: pextrw $3, %xmm1, %esi diff --git a/llvm/test/CodeGen/X86/var-permute-512.ll b/llvm/test/CodeGen/X86/var-permute-512.ll --- a/llvm/test/CodeGen/X86/var-permute-512.ll +++ b/llvm/test/CodeGen/X86/var-permute-512.ll @@ -101,7 +101,7 @@ ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; AVX512F-NEXT: vmovd %xmm4, %eax +; AVX512F-NEXT: vpextrw $0, %xmm4, %eax ; AVX512F-NEXT: vmovaps %zmm0, (%rsp) ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax @@ -127,7 +127,7 @@ ; AVX512F-NEXT: vpextrw $7, %xmm4, %eax ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0 -; AVX512F-NEXT: vmovd %xmm3, %eax +; AVX512F-NEXT: vpextrw $0, %xmm3, %eax ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -152,7 +152,7 @@ ; AVX512F-NEXT: vpextrw $7, %xmm3, %eax ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm4, %xmm3 -; AVX512F-NEXT: vmovd %xmm2, %eax +; AVX512F-NEXT: vpextrw $0, %xmm2, %eax ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -180,7 +180,7 @@ ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax ; AVX512F-NEXT: vpinsrw $7, %eax, %xmm4, %xmm2 -; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: vpextrw $0, %xmm1, %eax ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -330,7 +330,7 @@ ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; AVX512F-NEXT: vmovd %xmm4, %eax +; AVX512F-NEXT: vpextrb $0, %xmm4, %eax ; AVX512F-NEXT: vmovaps %zmm0, (%rsp) ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax @@ -380,7 +380,7 @@ ; AVX512F-NEXT: vpextrb $15, %xmm4, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0 -; AVX512F-NEXT: vmovd %xmm3, %eax +; AVX512F-NEXT: vpextrb $0, %xmm3, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -432,7 +432,7 @@ ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3 -; AVX512F-NEXT: vmovd %xmm2, %eax +; AVX512F-NEXT: vpextrb $0, %xmm2, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -485,7 +485,7 @@ ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2 -; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: vpextrb $0, %xmm1, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -555,7 +555,7 @@ ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; AVX512BW-NEXT: vmovd %xmm4, %eax +; AVX512BW-NEXT: vpextrb $0, %xmm4, %eax ; AVX512BW-NEXT: vmovaps %zmm0, (%rsp) ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax @@ -605,7 +605,7 @@ ; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0 -; AVX512BW-NEXT: vmovd %xmm3, %eax +; AVX512BW-NEXT: vpextrb $0, %xmm3, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vmovd %eax, %xmm4 @@ -657,7 +657,7 @@ ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3 -; AVX512BW-NEXT: vmovd %xmm2, %eax +; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vmovd %eax, %xmm4 @@ -710,7 +710,7 @@ ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: vmovd %xmm1, %eax +; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vmovd %eax, %xmm4 diff --git a/llvm/test/CodeGen/X86/vec_extract.ll b/llvm/test/CodeGen/X86/vec_extract.ll --- a/llvm/test/CodeGen/X86/vec_extract.ll +++ b/llvm/test/CodeGen/X86/vec_extract.ll @@ -110,11 +110,15 @@ ; X32-LABEL: ossfuzz15662: ; X32: # %bb.0: ; X32-NEXT: xorps %xmm0, %xmm0 +; X32-NEXT: movaps %xmm0, (%eax) +; X32-NEXT: xorps %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: ossfuzz15662: ; X64: # %bb.0: ; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, (%rax) +; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: retq %C10 = icmp ule i1 false, false %C3 = icmp ule i1 true, undef