diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5302,23 +5302,40 @@ break; } case Intrinsic::experimental_vector_insert: { - VectorType *VecTy = cast(Call.getArgOperand(0)->getType()); - VectorType *SubVecTy = cast(Call.getArgOperand(1)->getType()); + Value *Vec = Call.getArgOperand(0); + Value *SubVec = Call.getArgOperand(1); + Value *Idx = Call.getArgOperand(2); + unsigned IdxN = cast(Idx)->getZExtValue(); + VectorType *VecTy = cast(Vec->getType()); + VectorType *SubVecTy = cast(SubVec->getType()); + VectorType *ResultTy = cast(Call.getType()); + + Assert(VecTy == ResultTy, + "experimental_vector_insert vector type must match result type."); Assert(VecTy->getElementType() == SubVecTy->getElementType(), "experimental_vector_insert parameters must have the same element " "type.", &Call); + Assert(IdxN % SubVecTy->getElementCount().getKnownMinValue() == 0, + "experimental_vector_insert index must be a constant multiple of " + "the subvector's known minimum vector length."); break; } case Intrinsic::experimental_vector_extract: { + Value *Vec = Call.getArgOperand(0); + Value *Idx = Call.getArgOperand(1); + unsigned IdxN = cast(Idx)->getZExtValue(); VectorType *ResultTy = cast(Call.getType()); - VectorType *VecTy = cast(Call.getArgOperand(0)->getType()); + VectorType *VecTy = cast(Vec->getType()); Assert(ResultTy->getElementType() == VecTy->getElementType(), "experimental_vector_extract result must have the same element " "type as the input vector.", &Call); + Assert(IdxN % ResultTy->getElementCount().getKnownMinValue() == 0, + "experimental_vector_extract index must be a constant multiple of " + "the result type's known minimum vector length."); break; } case Intrinsic::experimental_noalias_scope_decl: { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1863,9 +1863,8 @@ unsigned SubVecNumElts = SubVecTy->getNumElements(); unsigned IdxN = cast(Idx)->getZExtValue(); - // The result of this call is undefined if IdxN is not a constant multiple - // of the SubVec's minimum vector length OR the insertion overruns Vec. - if (IdxN % SubVecNumElts != 0 || IdxN + SubVecNumElts > VecNumElts) { + // The result of this call is undefined if the insertion overruns Vec. + if (IdxN + SubVecNumElts > VecNumElts) { replaceInstUsesWith(CI, UndefValue::get(CI.getType())); return eraseInstFromFunction(CI); } @@ -1917,10 +1916,8 @@ unsigned VecNumElts = VecTy->getNumElements(); unsigned IdxN = cast(Idx)->getZExtValue(); - // The result of this call is undefined if IdxN is not a constant multiple - // of the result type's minimum vector length OR the extraction overruns - // Vec. - if (IdxN % DstNumElts != 0 || IdxN + DstNumElts > VecNumElts) { + // The result of this call is undefined if the extraction overruns Vec. + if (IdxN + DstNumElts > VecNumElts) { replaceInstUsesWith(CI, UndefValue::get(CI.getType())); return eraseInstFromFunction(CI); } diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll @@ -17,13 +17,20 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: cmp x9, #2 // =2 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: ldur q0, [sp, #8] +; CHECK-NEXT: lsl x8, x8, #3 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret -%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( %vec, i64 1) +%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( %vec, i64 2) ret <2 x i64> %retval } @@ -43,13 +50,20 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntw x9 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: cmp x9, #4 // =4 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: ldur q0, [sp, #4] +; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( %vec, i64 1) + %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( %vec, i64 4) ret <4 x i32> %retval } @@ -69,13 +83,20 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cnth x9 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: cmp x9, #8 // =8 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: ldur q0, [sp, #2] +; CHECK-NEXT: lsl x8, x8, #1 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16( %vec, i64 1) + %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16( %vec, i64 8) ret <8 x i16> %retval } @@ -95,13 +116,19 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: rdvl x9, #1 +; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: cmp x9, #16 // =16 ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: ldur q0, [sp, #1] +; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8( %vec, i64 1) + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8( %vec, i64 16) ret <16 x i8> %retval } diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -17,19 +17,26 @@ ret %retval } -define @insert_v2i64_nxv2i64_idx1( %vec, <2 x i64> %subvec) nounwind { -; CHECK-LABEL: insert_v2i64_nxv2i64_idx1: +define @insert_v2i64_nxv2i64_idx2( %vec, <2 x i64> %subvec) nounwind { +; CHECK-LABEL: insert_v2i64_nxv2i64_idx2: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: cmp x9, #2 // =2 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: lsl x8, x8, #3 +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: stur q1, [sp, #8] +; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %retval = call @llvm.experimental.vector.insert.nxv2i64.v2i64( %vec, <2 x i64> %subvec, i64 1) + %retval = call @llvm.experimental.vector.insert.nxv2i64.v2i64( %vec, <2 x i64> %subvec, i64 2) ret %retval } @@ -49,19 +56,26 @@ ret %retval } -define @insert_v4i32_nxv4i32_idx1( %vec, <4 x i32> %subvec) nounwind { -; CHECK-LABEL: insert_v4i32_nxv4i32_idx1: +define @insert_v4i32_nxv4i32_idx2( %vec, <4 x i32> %subvec) nounwind { +; CHECK-LABEL: insert_v4i32_nxv4i32_idx2: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntw x9 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: cmp x9, #4 // =4 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: stur q1, [sp, #4] +; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %retval = call @llvm.experimental.vector.insert.nxv4i32.v4i32( %vec, <4 x i32> %subvec, i64 1) + %retval = call @llvm.experimental.vector.insert.nxv4i32.v4i32( %vec, <4 x i32> %subvec, i64 4) ret %retval } @@ -81,19 +95,26 @@ ret %retval } -define @insert_v8i16_nxv8i16_idx1( %vec, <8 x i16> %subvec) nounwind { -; CHECK-LABEL: insert_v8i16_nxv8i16_idx1: +define @insert_v8i16_nxv8i16_idx2( %vec, <8 x i16> %subvec) nounwind { +; CHECK-LABEL: insert_v8i16_nxv8i16_idx2: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cnth x9 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: cmp x9, #8 // =8 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: lsl x8, x8, #1 +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: stur q1, [sp, #2] +; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %retval = call @llvm.experimental.vector.insert.nxv8i16.v8i16( %vec, <8 x i16> %subvec, i64 1) + %retval = call @llvm.experimental.vector.insert.nxv8i16.v8i16( %vec, <8 x i16> %subvec, i64 8) ret %retval } @@ -113,19 +134,25 @@ ret %retval } -define @insert_v16i8_nxv16i8_idx1( %vec, <16 x i8> %subvec) nounwind { -; CHECK-LABEL: insert_v16i8_nxv16i8_idx1: +define @insert_v16i8_nxv16i8_idx2( %vec, <16 x i8> %subvec) nounwind { +; CHECK-LABEL: insert_v16i8_nxv16i8_idx2: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: rdvl x9, #1 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: cmp x9, #16 // =16 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: stur q1, [sp, #1] +; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %retval = call @llvm.experimental.vector.insert.nxv16i8.v16i8( %vec, <16 x i8> %subvec, i64 1) + %retval = call @llvm.experimental.vector.insert.nxv16i8.v16i8( %vec, <16 x i8> %subvec, i64 16) ret %retval } diff --git a/llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll b/llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll --- a/llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll @@ -100,29 +100,6 @@ ret <3 x i32> %1 } -; ============================================================================ ; -; Invalid canonicalizations -; ============================================================================ ; - -; Idx must be the be a constant multiple of the destination vector's length, -; otherwise the result is undefined. -define <4 x i32> @idx_not_constant_multiple(<8 x i32> %vec) { -; CHECK-LABEL: @idx_not_constant_multiple( -; CHECK-NEXT: ret <4 x i32> undef -; - %1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 1) - ret <4 x i32> %1 -} - -; If the extraction overruns the vector, the result is undefined. -define <10 x i32> @extract_overrun(<8 x i32> %vec) { -; CHECK-LABEL: @extract_overrun( -; CHECK-NEXT: ret <10 x i32> undef -; - %1 = call <10 x i32> @llvm.experimental.vector.extract.v10i32.v8i32(<8 x i32> %vec, i64 0) - ret <10 x i32> %1 -} - ; ============================================================================ ; ; Scalable cases ; ============================================================================ ; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll b/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll --- a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll @@ -108,29 +108,6 @@ ret <8 x i32> %1 } -; ============================================================================ ; -; Invalid canonicalizations -; ============================================================================ ; - -; Idx must be the be a constant multiple of the subvector's minimum vector -; length, otherwise the result is undefined. -define <8 x i32> @idx_not_constant_multiple(<8 x i32> %vec, <4 x i32> %subvec) { -; CHECK-LABEL: @idx_not_constant_multiple( -; CHECK-NEXT: ret <8 x i32> undef -; - %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 2) - ret <8 x i32> %1 -} - -; If the insertion overruns the vector, the result is undefined. -define <8 x i32> @insert_overrun(<8 x i32> %vec, <8 x i32> %subvec) { -; CHECK-LABEL: @insert_overrun( -; CHECK-NEXT: ret <8 x i32> undef -; - %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 4) - ret <8 x i32> %1 -} - ; ============================================================================ ; ; Scalable cases ; ============================================================================ ; diff --git a/llvm/test/Verifier/insert-extract-intrinsics-invalid.ll b/llvm/test/Verifier/insert-extract-intrinsics-invalid.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Verifier/insert-extract-intrinsics-invalid.ll @@ -0,0 +1,16 @@ +; RUN: not opt -verify -S < %s 2>&1 >/dev/null | FileCheck %s + +; CHECK: experimental_vector_extract index must be a constant multiple of the result type's known minimum vector length. +define <4 x i32> @extract_idx_not_constant_multiple(<8 x i32> %vec) { + %1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 1) + ret <4 x i32> %1 +} + +; CHECK: experimental_vector_insert index must be a constant multiple of the subvector's known minimum vector length. +define <8 x i32> @insert_idx_not_constant_multiple(<8 x i32> %vec, <4 x i32> %subvec) { + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 2) + ret <8 x i32> %1 +} + +declare <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32>, i64) +declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64)