diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -520,7 +520,9 @@ /// The elements of VECTOR1 starting at IDX are overwritten with VECTOR2. /// Elements IDX through (IDX + num_elements(T) - 1) must be valid VECTOR1 /// indices. If this condition cannot be determined statically but is false at - /// runtime, then the result vector is undefined. + /// runtime, then the result vector is undefined. The IDX parameter must be a + /// vector index constant type, which for most targets will be an integer + /// pointer type. /// /// This operation supports inserting a fixed-width vector into a scalable /// vector, but not the other way around. @@ -1044,7 +1046,8 @@ /// DEBUGTRAP - Trap intended to get the attention of a debugger. DEBUGTRAP, - /// UBSANTRAP - Trap with an immediate describing the kind of sanitizer failure. + /// UBSANTRAP - Trap with an immediate describing the kind of sanitizer + /// failure. UBSANTRAP, /// PREFETCH - This corresponds to a prefetch intrinsic. The first operand diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5595,9 +5595,7 @@ N1VT.getVectorMinNumElements()) && "Extract subvector overflow!"); assert(N2C->getAPIntValue().getBitWidth() == - TLI->getVectorIdxTy(getDataLayout()) - .getSizeInBits() - .getFixedSize() && + TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() && "Constant index for EXTRACT_SUBVECTOR has an invalid size"); // Trivial extraction. @@ -5817,6 +5815,9 @@ cast(N3)->getZExtValue()) <= VT.getVectorMinNumElements()) && "Insert subvector overflow!"); + assert(cast(N3)->getAPIntValue().getBitWidth() == + TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() && + "Constant index for INSERT_SUBVECTOR has an invalid size"); // Trivial insertion. if (VT == N2VT) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7009,6 +7009,14 @@ SDValue Vec = getValue(I.getOperand(0)); SDValue SubVec = getValue(I.getOperand(1)); SDValue Index = getValue(I.getOperand(2)); + + // The intrinsic's index type is i64, but the SDNode requires an index type + // suitable for the target. Convert the index as required. + MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + if (Index.getValueType() != VectorIdxTy) + Index = DAG.getVectorIdxConstant( + cast(Index)->getZExtValue(), DL); + EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec, Index)); @@ -7021,6 +7029,13 @@ SDValue Index = getValue(I.getOperand(1)); EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + // The intrinsic's index type is i64, but the SDNode requires an index type + // suitable for the target. Convert the index as required. + MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + if (Index.getValueType() != VectorIdxTy) + Index = DAG.getVectorIdxConstant( + cast(Index)->getZExtValue(), DL); + setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index)); return; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8023,7 +8023,7 @@ SDLoc DL(V64Reg); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy), - V64Reg, DAG.getConstant(0, DL, MVT::i32)); + V64Reg, DAG.getConstant(0, DL, MVT::i64)); } /// getExtFactor - Determine the adjustment factor for the position when diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -10502,7 +10502,7 @@ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx)))), - (i32 0))), + (i64 0))), (i64 0))))), (EXTRACT_SUBREG (v2i32 (!cast(NAME # v2i32_indexed) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5607,7 +5607,7 @@ // If none did, fallback to the explicit patterns, consuming the vector_extract. def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), - (i32 0)), (i64 0))), + (i64 0)), (i64 0))), (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), ssub)>; @@ -5616,7 +5616,7 @@ (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), ssub)>; def : Pat<(i32 (vector_extract (insert_subvector undef, - (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))), + (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))), (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), ssub)>; @@ -5637,7 +5637,7 @@ // If there is a sign extension after this intrinsic, consume it as smov already // performed it def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, - (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)), + (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)), (i32 (SMOVvi8to32 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), @@ -5649,7 +5649,7 @@ (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), (i64 0)))>; def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, - (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)), + (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)), (i32 (SMOVvi16to32 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), @@ -5668,7 +5668,7 @@ // If there is a masking operation keeping only what has been actually // generated, consume it. def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, - (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)), + (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), @@ -5680,7 +5680,7 @@ (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), ssub))>; def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, - (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)), + (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), @@ -6003,7 +6003,7 @@ (v2f32 (AArch64duplane32 (v4f32 (insert_subvector undef, (v2f32 (fneg V64:$Rm)), - (i32 0))), + (i64 0))), VectorIndexS:$idx)))), (FMLSv2i32_indexed V64:$Rd, V64:$Rn, (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), @@ -6024,7 +6024,7 @@ (v4f32 (AArch64duplane32 (v4f32 (insert_subvector undef, (v2f32 (fneg V64:$Rm)), - (i32 0))), + (i64 0))), VectorIndexS:$idx)))), (FMLSv4i32_indexed V128:$Rd, V128:$Rn, (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), @@ -6055,7 +6055,7 @@ def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), (vector_extract (v4f32 (insert_subvector undef, (v2f32 (fneg V64:$Rm)), - (i32 0))), + (i64 0))), VectorIndexS:$idx))), (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll @@ -97,12 +97,12 @@ ; CHECK-LABEL: test_v9i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: mov v0.b[9], w8 -; CHECK-NEXT: mov v0.b[10], w8 -; CHECK-NEXT: mov v0.b[11], w8 -; CHECK-NEXT: mov v0.b[12], w8 -; CHECK-NEXT: mov v0.b[13], w8 -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: mov v1.b[9], w8 +; CHECK-NEXT: mov v1.b[10], w8 +; CHECK-NEXT: mov v1.b[11], w8 +; CHECK-NEXT: mov v1.b[13], w8 +; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: and v1.8b, v0.8b, v1.8b ; CHECK-NEXT: umov w8, v1.b[1] ; CHECK-NEXT: umov w9, v1.b[0] diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv32 -mattr=+m,+d,+experimental-zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple riscv64 -mattr=+m,+d,+experimental-zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s define @extract_nxv8i32_nxv4i32_0( %vec) {