diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5320,23 +5320,61 @@ break; } case Intrinsic::experimental_vector_insert: { - VectorType *VecTy = cast(Call.getArgOperand(0)->getType()); - VectorType *SubVecTy = cast(Call.getArgOperand(1)->getType()); + Value *Vec = Call.getArgOperand(0); + Value *SubVec = Call.getArgOperand(1); + Value *Idx = Call.getArgOperand(2); + unsigned IdxN = cast(Idx)->getZExtValue(); + VectorType *VecTy = cast(Vec->getType()); + VectorType *SubVecTy = cast(SubVec->getType()); + + ElementCount VecEC = VecTy->getElementCount(); + ElementCount SubVecEC = SubVecTy->getElementCount(); Assert(VecTy->getElementType() == SubVecTy->getElementType(), "experimental_vector_insert parameters must have the same element " "type.", &Call); + Assert(IdxN % SubVecEC.getKnownMinValue() == 0, + "experimental_vector_insert index must be a constant multiple of " + "the subvector's known minimum vector length."); + + // If this insertion is not the 'mixed' case where a fixed vector is + // inserted into a scalable vector, ensure that the insertion of the + // subvector does not overrun the parent vector. + if (VecEC.isScalable() == SubVecEC.isScalable()) { + Assert( + IdxN + SubVecEC.getKnownMinValue() <= VecEC.getKnownMinValue(), + "subvector operand of experimental_vector_insert would overrun the " + "vector being inserted into."); + } break; } case Intrinsic::experimental_vector_extract: { + Value *Vec = Call.getArgOperand(0); + Value *Idx = Call.getArgOperand(1); + unsigned IdxN = cast(Idx)->getZExtValue(); + VectorType *ResultTy = cast(Call.getType()); - VectorType *VecTy = cast(Call.getArgOperand(0)->getType()); + VectorType *VecTy = cast(Vec->getType()); + + ElementCount VecEC = VecTy->getElementCount(); + ElementCount ResultEC = ResultTy->getElementCount(); Assert(ResultTy->getElementType() == VecTy->getElementType(), "experimental_vector_extract result must have the same element " "type as the input vector.", &Call); + Assert(IdxN % ResultEC.getKnownMinValue() == 0, + "experimental_vector_extract index must be a constant multiple of " + "the result type's known minimum vector length."); + + // If this extraction is not the 'mixed' case where a fixed vector is is + // extracted from a scalable vector, ensure that the extraction does not + // overrun the parent vector. + if (VecEC.isScalable() == ResultEC.isScalable()) { + Assert(IdxN + ResultEC.getKnownMinValue() <= VecEC.getKnownMinValue(), + "experimental_vector_extract would overrun."); + } break; } case Intrinsic::experimental_noalias_scope_decl: { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1873,13 +1873,6 @@ unsigned SubVecNumElts = SubVecTy->getNumElements(); unsigned IdxN = cast(Idx)->getZExtValue(); - // The result of this call is undefined if IdxN is not a constant multiple - // of the SubVec's minimum vector length OR the insertion overruns Vec. - if (IdxN % SubVecNumElts != 0 || IdxN + SubVecNumElts > VecNumElts) { - replaceInstUsesWith(CI, UndefValue::get(CI.getType())); - return eraseInstFromFunction(CI); - } - // An insert that entirely overwrites Vec with SubVec is a nop. if (VecNumElts == SubVecNumElts) { replaceInstUsesWith(CI, SubVec); @@ -1927,14 +1920,6 @@ unsigned VecNumElts = VecTy->getNumElements(); unsigned IdxN = cast(Idx)->getZExtValue(); - // The result of this call is undefined if IdxN is not a constant multiple - // of the result type's minimum vector length OR the extraction overruns - // Vec. - if (IdxN % DstNumElts != 0 || IdxN + DstNumElts > VecNumElts) { - replaceInstUsesWith(CI, UndefValue::get(CI.getType())); - return eraseInstFromFunction(CI); - } - // Extracting the entirety of Vec is a nop. if (VecNumElts == DstNumElts) { replaceInstUsesWith(CI, Vec); diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll @@ -12,18 +12,25 @@ } ; Goes through memory currently; idx != 0. -define <2 x i64> @extract_v2i64_nxv2i64_idx1( %vec) nounwind { -; CHECK-LABEL: extract_v2i64_nxv2i64_idx1: +define <2 x i64> @extract_v2i64_nxv2i64_idx2( %vec) nounwind { +; CHECK-LABEL: extract_v2i64_nxv2i64_idx2: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: cmp x9, #2 // =2 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: ldur q0, [sp, #8] +; CHECK-NEXT: lsl x8, x8, #3 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret -%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( %vec, i64 1) +%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( %vec, i64 2) ret <2 x i64> %retval } @@ -38,18 +45,25 @@ } ; Goes through memory currently; idx != 0. -define <4 x i32> @extract_v4i32_nxv4i32_idx1( %vec) nounwind { -; CHECK-LABEL: extract_v4i32_nxv4i32_idx1: +define <4 x i32> @extract_v4i32_nxv4i32_idx4( %vec) nounwind { +; CHECK-LABEL: extract_v4i32_nxv4i32_idx4: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntw x9 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: cmp x9, #4 // =4 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: ldur q0, [sp, #4] +; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( %vec, i64 1) + %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( %vec, i64 4) ret <4 x i32> %retval } @@ -64,18 +78,25 @@ } ; Goes through memory currently; idx != 0. -define <8 x i16> @extract_v8i16_nxv8i16_idx1( %vec) nounwind { -; CHECK-LABEL: extract_v8i16_nxv8i16_idx1: +define <8 x i16> @extract_v8i16_nxv8i16_idx8( %vec) nounwind { +; CHECK-LABEL: extract_v8i16_nxv8i16_idx8: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cnth x9 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: cmp x9, #8 // =8 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: ldur q0, [sp, #2] +; CHECK-NEXT: lsl x8, x8, #1 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16( %vec, i64 1) + %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16( %vec, i64 8) ret <8 x i16> %retval } @@ -90,18 +111,24 @@ } ; Goes through memory currently; idx != 0. -define <16 x i8> @extract_v16i8_nxv16i8_idx1( %vec) nounwind { -; CHECK-LABEL: extract_v16i8_nxv16i8_idx1: +define <16 x i8> @extract_v16i8_nxv16i8_idx16( %vec) nounwind { +; CHECK-LABEL: extract_v16i8_nxv16i8_idx16: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: rdvl x9, #1 +; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: cmp x9, #16 // =16 ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: ldur q0, [sp, #1] +; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8( %vec, i64 1) + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8( %vec, i64 16) ret <16 x i8> %retval } diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -17,19 +17,26 @@ ret %retval } -define @insert_v2i64_nxv2i64_idx1( %vec, <2 x i64> %subvec) nounwind { -; CHECK-LABEL: insert_v2i64_nxv2i64_idx1: +define @insert_v2i64_nxv2i64_idx2( %vec, <2 x i64> %subvec) nounwind { +; CHECK-LABEL: insert_v2i64_nxv2i64_idx2: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: cmp x9, #2 // =2 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: lsl x8, x8, #3 +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: stur q1, [sp, #8] +; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %retval = call @llvm.experimental.vector.insert.nxv2i64.v2i64( %vec, <2 x i64> %subvec, i64 1) + %retval = call @llvm.experimental.vector.insert.nxv2i64.v2i64( %vec, <2 x i64> %subvec, i64 2) ret %retval } @@ -49,19 +56,26 @@ ret %retval } -define @insert_v4i32_nxv4i32_idx1( %vec, <4 x i32> %subvec) nounwind { -; CHECK-LABEL: insert_v4i32_nxv4i32_idx1: +define @insert_v4i32_nxv4i32_idx4( %vec, <4 x i32> %subvec) nounwind { +; CHECK-LABEL: insert_v4i32_nxv4i32_idx4: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntw x9 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: cmp x9, #4 // =4 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: stur q1, [sp, #4] +; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %retval = call @llvm.experimental.vector.insert.nxv4i32.v4i32( %vec, <4 x i32> %subvec, i64 1) + %retval = call @llvm.experimental.vector.insert.nxv4i32.v4i32( %vec, <4 x i32> %subvec, i64 4) ret %retval } @@ -81,19 +95,26 @@ ret %retval } -define @insert_v8i16_nxv8i16_idx1( %vec, <8 x i16> %subvec) nounwind { -; CHECK-LABEL: insert_v8i16_nxv8i16_idx1: +define @insert_v8i16_nxv8i16_idx8( %vec, <8 x i16> %subvec) nounwind { +; CHECK-LABEL: insert_v8i16_nxv8i16_idx8: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cnth x9 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: cmp x9, #8 // =8 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: lsl x8, x8, #1 +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: stur q1, [sp, #2] +; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %retval = call @llvm.experimental.vector.insert.nxv8i16.v8i16( %vec, <8 x i16> %subvec, i64 1) + %retval = call @llvm.experimental.vector.insert.nxv8i16.v8i16( %vec, <8 x i16> %subvec, i64 8) ret %retval } @@ -113,19 +134,25 @@ ret %retval } -define @insert_v16i8_nxv16i8_idx1( %vec, <16 x i8> %subvec) nounwind { -; CHECK-LABEL: insert_v16i8_nxv16i8_idx1: +define @insert_v16i8_nxv16i8_idx16( %vec, <16 x i8> %subvec) nounwind { +; CHECK-LABEL: insert_v16i8_nxv16i8_idx16: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: rdvl x9, #1 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: cmp x9, #16 // =16 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: stur q1, [sp, #1] +; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %retval = call @llvm.experimental.vector.insert.nxv16i8.v16i8( %vec, <16 x i8> %subvec, i64 1) + %retval = call @llvm.experimental.vector.insert.nxv16i8.v16i8( %vec, <16 x i8> %subvec, i64 16) ret %retval } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll @@ -297,23 +297,6 @@ ret void } -define void @extract_v8i1_nxv2i1_2( %x, <8 x i1>* %y) { -; CHECK-LABEL: extract_v8i1_nxv2i1_2: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmv.v.i v25, 0 -; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v25, 2 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vmsne.vi v25, v25, 0 -; CHECK-NEXT: vse1.v v25, (a0) -; CHECK-NEXT: ret - %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1( %x, i64 2) - store <8 x i1> %c, <8 x i1>* %y - ret void -} - define void @extract_v8i1_nxv64i1_0( %x, <8 x i1>* %y) { ; CHECK-LABEL: extract_v8i1_nxv64i1_0: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -68,31 +68,6 @@ ret %v } -define @insert_nxv8i32_v8i32_4( %vec, <8 x i32>* %svp) { -; LMULMAX2-LABEL: insert_nxv8i32_v8i32_4: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX2-NEXT: vle32.v v28, (a0) -; LMULMAX2-NEXT: vsetivli zero, 12, e32, m4, tu, mu -; LMULMAX2-NEXT: vslideup.vi v8, v28, 4 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: insert_nxv8i32_v8i32_4: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vle32.v v28, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v12, (a0) -; LMULMAX1-NEXT: vsetivli zero, 8, e32, m4, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v28, 4 -; LMULMAX1-NEXT: vsetivli zero, 12, e32, m4, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v12, 8 -; LMULMAX1-NEXT: ret - %sv = load <8 x i32>, <8 x i32>* %svp - %v = call @llvm.experimental.vector.insert.v8i32.nxv8i32( %vec, <8 x i32> %sv, i64 4) - ret %v -} - define @insert_nxv8i32_v8i32_8( %vec, <8 x i32>* %svp) { ; LMULMAX2-LABEL: insert_nxv8i32_v8i32_8: ; LMULMAX2: # %bb.0: @@ -509,28 +484,6 @@ ret %c } -define @insert_nxv2i1_v4i1_6( %v, <4 x i1>* %svp) { -; CHECK-LABEL: insert_nxv2i1_v4i1_6: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; CHECK-NEXT: vle1.v v27, (a0) -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmv.v.i v25, 0 -; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; CHECK-NEXT: vmv.v.i v26, 0 -; CHECK-NEXT: vmv1r.v v0, v27 -; CHECK-NEXT: vmerge.vim v26, v26, 1, v0 -; CHECK-NEXT: vsetivli zero, 10, e8, mf4, tu, mu -; CHECK-NEXT: vslideup.vi v25, v26, 6 -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmsne.vi v0, v25, 0 -; CHECK-NEXT: ret - %sv = load <4 x i1>, <4 x i1>* %svp - %c = call @llvm.experimental.vector.insert.v4i1.nxv2i1( %v, <4 x i1> %sv, i64 6) - ret %c -} - define @insert_nxv8i1_v4i1_0( %v, <8 x i1>* %svp) { ; CHECK-LABEL: insert_nxv8i1_v4i1_0: ; CHECK: # %bb.0: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll @@ -289,7 +289,7 @@ ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call @llvm.experimental.vector.insert.nxv2i64.v2i64( undef, - <2 x i64> , i64 1) + <2 x i64> , i64 2) %3 = tail call @llvm.aarch64.sve.dupq.lane.nxv2i64( %2 , i64 0) %4 = tail call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) %5 = tail call @llvm.aarch64.sve.cmpne.nxv2i64( %1, %3, %4) diff --git a/llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll b/llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll --- a/llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll @@ -100,29 +100,6 @@ ret <3 x i32> %1 } -; ============================================================================ ; -; Invalid canonicalizations -; ============================================================================ ; - -; Idx must be the be a constant multiple of the destination vector's length, -; otherwise the result is undefined. -define <4 x i32> @idx_not_constant_multiple(<8 x i32> %vec) { -; CHECK-LABEL: @idx_not_constant_multiple( -; CHECK-NEXT: ret <4 x i32> undef -; - %1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 1) - ret <4 x i32> %1 -} - -; If the extraction overruns the vector, the result is undefined. -define <10 x i32> @extract_overrun(<8 x i32> %vec) { -; CHECK-LABEL: @extract_overrun( -; CHECK-NEXT: ret <10 x i32> undef -; - %1 = call <10 x i32> @llvm.experimental.vector.extract.v10i32.v8i32(<8 x i32> %vec, i64 0) - ret <10 x i32> %1 -} - ; ============================================================================ ; ; Scalable cases ; ============================================================================ ; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll b/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll --- a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll @@ -108,29 +108,6 @@ ret <8 x i32> %1 } -; ============================================================================ ; -; Invalid canonicalizations -; ============================================================================ ; - -; Idx must be the be a constant multiple of the subvector's minimum vector -; length, otherwise the result is undefined. -define <8 x i32> @idx_not_constant_multiple(<8 x i32> %vec, <4 x i32> %subvec) { -; CHECK-LABEL: @idx_not_constant_multiple( -; CHECK-NEXT: ret <8 x i32> undef -; - %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 2) - ret <8 x i32> %1 -} - -; If the insertion overruns the vector, the result is undefined. -define <8 x i32> @insert_overrun(<8 x i32> %vec, <8 x i32> %subvec) { -; CHECK-LABEL: @insert_overrun( -; CHECK-NEXT: ret <8 x i32> undef -; - %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 4) - ret <8 x i32> %1 -} - ; ============================================================================ ; ; Scalable cases ; ============================================================================ ; diff --git a/llvm/test/Verifier/insert-extract-intrinsics-invalid.ll b/llvm/test/Verifier/insert-extract-intrinsics-invalid.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Verifier/insert-extract-intrinsics-invalid.ll @@ -0,0 +1,72 @@ +; RUN: not opt -verify -S < %s 2>&1 >/dev/null | FileCheck %s + +; +; Test that extractions/insertion indices are validated. +; + +; CHECK: experimental_vector_extract index must be a constant multiple of the result type's known minimum vector length. +define <4 x i32> @extract_idx_not_constant_multiple(<8 x i32> %vec) { + %1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 1) + ret <4 x i32> %1 +} + +; CHECK: experimental_vector_insert index must be a constant multiple of the subvector's known minimum vector length. +define <8 x i32> @insert_idx_not_constant_multiple(<8 x i32> %vec, <4 x i32> %subvec) { + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 2) + ret <8 x i32> %1 +} + +; +; Test that extractions/insertions which 'overrun' are captured. +; + +; CHECK: experimental_vector_extract would overrun. +define <3 x i32> @extract_overrun_fixed_fixed(<8 x i32> %vec) { + %1 = call <3 x i32> @llvm.experimental.vector.extract.v8i32.v3i32(<8 x i32> %vec, i64 6) + ret <3 x i32> %1 +} + +; CHECK: experimental_vector_extract would overrun. +define @extract_overrun_scalable_scalable( %vec) { + %1 = call @llvm.experimental.vector.extract.nxv8i32.nxv3i32( %vec, i64 6) + ret %1 +} + +; We cannot statically check whether or not an extraction of a fixed vector +; from a scalable vector would overrun, because we can't compare the sizes of +; the two. Therefore, this function should not raise verifier errors. +; CHECK-NOT: experimental_vector_extract would overrun. +define <3 x i32> @extract_overrun_scalable_fixed( %vec) { + %1 = call <3 x i32> @llvm.experimental.vector.extract.nxv8i32.v3i32( %vec, i64 6) + ret <3 x i32> %1 +} + +; CHECK: subvector operand of experimental_vector_insert would overrun the vector being inserted into. +define <8 x i32> @insert_overrun_fixed_fixed(<8 x i32> %vec, <3 x i32> %subvec) { + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 6) + ret <8 x i32> %1 +} + +; CHECK: subvector operand of experimental_vector_insert would overrun the vector being inserted into. +define @insert_overrun_scalable_scalable( %vec, %subvec) { + %1 = call @llvm.experimental.vector.insert.nxv8i32.nxv3i32( %vec, %subvec, i64 6) + ret %1 +} + +; We cannot statically check whether or not an insertion of a fixed vector into +; a scalable vector would overrun, because we can't compare the sizes of the +; two. Therefore, this function should not raise verifier errors. +; CHECK-NOT: subvector operand of experimental_vector_insert would overrun the vector being inserted into. +define @insert_overrun_scalable_fixed( %vec, <3 x i32> %subvec) { + %1 = call @llvm.experimental.vector.insert.nxv8i32.v3i32( %vec, <3 x i32> %subvec, i64 6) + ret %1 +} + +declare @llvm.experimental.vector.extract.nxv8i32.nxv3i32(, i64) +declare @llvm.experimental.vector.insert.nxv8i32.nxv3i32(, , i64) +declare @llvm.experimental.vector.insert.nxv8i32.v3i32(, <3 x i32>, i64) +declare <3 x i32> @llvm.experimental.vector.extract.nxv8i32.v3i32(, i64) +declare <3 x i32> @llvm.experimental.vector.extract.v8i32.v3i32(<8 x i32>, i64) +declare <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32>, i64) +declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32>, <3 x i32>, i64) +declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64)