diff --git a/mlir/include/mlir/Conversion/NeonToLLVM/ConvertNeonToLLVM.h b/mlir/include/mlir/Conversion/NeonToLLVM/ConvertNeonToLLVM.h
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Conversion/NeonToLLVM/ConvertNeonToLLVM.h
@@ -0,0 +1,30 @@
+//===- ConvertNeonToLLVM.h - Conversion Patterns from Neon to LLVM --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CONVERSION_NEONTOLLVM_CONVERTNEONTOLLVM_H_
+#define MLIR_CONVERSION_NEONTOLLVM_CONVERTNEONTOLLVM_H_
+
+#include <memory>
+
+namespace mlir {
+class LLVMTypeConverter;
+class ModuleOp;
+template <typename T>
+class OperationPass;
+class OwningRewritePatternList;
+
+/// Collect a set of patterns to convert from the Neon dialect to LLVM.
+void populateNeonToLLVMConversionPatterns(LLVMTypeConverter &converter,
+                                          OwningRewritePatternList &patterns);
+
+/// Create a pass to convert Neon operations to the LLVMIR dialect.
+std::unique_ptr<OperationPass<ModuleOp>> createConvertNeonToLLVMPass();
+
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_NEONTOLLVM_CONVERTNEONTOLLVM_H_
diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h
--- a/mlir/include/mlir/Conversion/Passes.h
+++ b/mlir/include/mlir/Conversion/Passes.h
@@ -20,6 +20,7 @@
 #include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h"
 #include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h"
 #include "mlir/Conversion/LinalgToStandard/LinalgToStandard.h"
+#include "mlir/Conversion/NeonToLLVM/ConvertNeonToLLVM.h"
 #include "mlir/Conversion/OpenMPToLLVM/ConvertOpenMPToLLVM.h"
 #include "mlir/Conversion/PDLToPDLInterp/PDLToPDLInterp.h"
 #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -210,6 +210,17 @@
   let dependentDialects = ["spirv::SPIRVDialect"];
 }
 
+//===----------------------------------------------------------------------===//
+// NeonToLLVM
+//===----------------------------------------------------------------------===//
+
+def ConvertNeonToLLVM : Pass<"convert-neon-to-llvm", "ModuleOp"> {
+  let summary = "Convert the operations from the neon dialect into the LLVM "
+                "dialect";
+  let constructor = "mlir::createConvertNeonToLLVMPass()";
+  let dependentDialects = ["LLVM::LLVMDialect", "LLVM::LLVMNeonDialect"];
+}
+
 //===----------------------------------------------------------------------===//
 // OpenMPToLLVM
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/CMakeLists.txt b/mlir/include/mlir/Dialect/CMakeLists.txt
--- a/mlir/include/mlir/Dialect/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/CMakeLists.txt
@@ -4,6 +4,7 @@
 add_subdirectory(GPU)
 add_subdirectory(Linalg)
 add_subdirectory(LLVMIR)
+add_subdirectory(Neon)
 add_subdirectory(OpenACC)
 add_subdirectory(OpenMP)
 add_subdirectory(PDL)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
--- a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
@@ -8,25 +8,32 @@
 mlir_tablegen(LLVMOpsEnums.cpp.inc -gen-enum-defs)
 add_public_tablegen_target(MLIRLLVMOpsIncGen)
 
-add_mlir_dialect(NVVMOps nvvm)
-add_mlir_doc(NVVMOps -gen-dialect-doc NVVMDialect Dialects/)
-add_mlir_dialect(ROCDLOps rocdl)
-add_mlir_doc(ROCDLOps -gen-dialect-doc ROCDLDialect Dialects/)
-
 set(LLVM_TARGET_DEFINITIONS LLVMOps.td)
 mlir_tablegen(LLVMConversions.inc -gen-llvmir-conversions)
 mlir_tablegen(LLVMConversionEnumsToLLVM.inc -gen-enum-to-llvmir-conversions)
 mlir_tablegen(LLVMConversionEnumsFromLLVM.inc -gen-enum-from-llvmir-conversions)
 add_public_tablegen_target(MLIRLLVMConversionsIncGen)
+
+add_mlir_dialect(NVVMOps nvvm)
+add_mlir_doc(NVVMOps -gen-dialect-doc NVVMDialect Dialects/)
 set(LLVM_TARGET_DEFINITIONS NVVMOps.td)
 mlir_tablegen(NVVMConversions.inc -gen-llvmir-conversions)
 add_public_tablegen_target(MLIRNVVMConversionsIncGen)
+
+add_mlir_dialect(ROCDLOps rocdl)
+add_mlir_doc(ROCDLOps -gen-dialect-doc ROCDLDialect Dialects/)
 set(LLVM_TARGET_DEFINITIONS ROCDLOps.td)
 mlir_tablegen(ROCDLConversions.inc -gen-llvmir-conversions)
 add_public_tablegen_target(MLIRROCDLConversionsIncGen)
 
 add_mlir_dialect(LLVMAVX512 llvm_avx512 LLVMAVX512)
-
+add_mlir_doc(LLVMAVX512 -gen-dialect-doc LLVMAVX512 Dialects/)
 set(LLVM_TARGET_DEFINITIONS LLVMAVX512.td)
 mlir_tablegen(LLVMAVX512Conversions.inc -gen-llvmir-conversions)
 add_public_tablegen_target(MLIRLLVMAVX512ConversionsIncGen)
+
+add_mlir_dialect(LLVMNeon llvm_neon LLVMNeon)
+add_mlir_doc(LLVMNeon -gen-dialect-doc LLVMNeon Dialects/)
+set(LLVM_TARGET_DEFINITIONS LLVMNeon.td)
+mlir_tablegen(LLVMNeonConversions.inc -gen-llvmir-conversions)
+add_public_tablegen_target(MLIRLLVMNeonConversionsIncGen)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMNeon.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMNeon.td
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMNeon.td
@@ -0,0 +1,43 @@
+//===-- LLVMNeon.td - LLVMNeon dialect op definitions ------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the basic operations for the LLVMNeon dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVMIR_NEON_OPS
+#define LLVMIR_NEON_OPS
+
+include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
+
+//===----------------------------------------------------------------------===//
+// LLVMNeon dialect definition
+//===----------------------------------------------------------------------===//
+
+def LLVMNeon_Dialect : Dialect {
+  let name = "llvm_neon";
+  let cppNamespace = "::mlir::LLVM";
+}
+
+//----------------------------------------------------------------------------//
+// MLIR LLVM Neon intrinsics using the MLIR LLVM Dialect type system
+//----------------------------------------------------------------------------//
+
+class LLVMNeon_IntrBinaryOverloadedOp<string mnemonic, list<OpTrait> traits = []> :
+  LLVM_IntrOpBase</*Dialect dialect=*/LLVMNeon_Dialect,
+                  /*string opName=*/mnemonic,
+                  /*string enumName=*/"aarch64_neon_" # !subst(".", "_", mnemonic),
+                  /*list<int> overloadedResults=*/[0],
+                  /*list<int> overloadedOperands=*/[], // defined by result overload
+                  /*list<OpTrait> traits=*/traits,
+                  /*int numResults=*/1>;
+
+def LLVM_aarch64_neon_smull :
+  LLVMNeon_IntrBinaryOverloadedOp<"smull">, Arguments<(ins LLVM_Type, LLVM_Type)>;
+
+#endif // NEON_OPS
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMNeonDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMNeonDialect.h
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMNeonDialect.h
@@ -0,0 +1,24 @@
+//===- LLVMNeonDialect.h - MLIR Dialect for LLVMNeon ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Target dialect for LLVMNeon in MLIR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_LLVMIR_LLVMNEONDIALECT_H_
+#define MLIR_DIALECT_LLVMIR_LLVMNEONDIALECT_H_
+
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/OpDefinition.h"
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/LLVMIR/LLVMNeon.h.inc"
+
+#include "mlir/Dialect/LLVMIR/LLVMNeonDialect.h.inc"
+
+#endif // MLIR_DIALECT_LLVMIR_LLVMNEONDIALECT_H_
diff --git a/mlir/include/mlir/Dialect/Neon/CMakeLists.txt b/mlir/include/mlir/Dialect/Neon/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Neon/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_mlir_dialect(Neon neon)
+add_mlir_doc(Neon -gen-dialect-doc Neon Dialects/)
diff --git a/mlir/include/mlir/Dialect/Neon/Neon.td b/mlir/include/mlir/Dialect/Neon/Neon.td
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Neon/Neon.td
@@ -0,0 +1,54 @@
+//===-- NeonOps.td - Neon dialect operation definitions ----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the basic operations for the Neon dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NEON_OPS
+#define NEON_OPS
+
+include "mlir/Interfaces/SideEffectInterfaces.td"
+
+//===----------------------------------------------------------------------===//
+// Neon dialect definition
+//===----------------------------------------------------------------------===//
+
+def Neon_Dialect : Dialect {
+  let name = "neon";
+  let cppNamespace = "::mlir::neon";
+}
+
+//===----------------------------------------------------------------------===//
+// Neon op definitions
+//===----------------------------------------------------------------------===//
+
+class Neon_Op<string mnemonic, list<OpTrait> traits = []> :
+  Op<Neon_Dialect, mnemonic, traits> {}
+
+def SMullOp : Neon_Op<"smull", [NoSideEffect,
+  AllTypesMatch<["a", "b"]>,
+  TypesMatchWith<
+    "res has same vector shape and element bitwidth scaled by 2 as a",
+    "a", "res", "$_self.cast<VectorType>().scaleElementBitwidth(2)">]> {
+  let summary = "smull roundscale op";
+  let description = [{
+    TODO: smull op
+  }];
+  // Supports either:
+  //   (vector<8xsi8>, vector<8xsi8>) -> (vector<8xsi16>)
+  //   (vector<4xsi16>, vector<4xsi16>) -> (vector<4xsi32>)
+  //   (vector<2xsi32>, vector<2xsi32>) -> (vector<2xsi64>)
+  let arguments = (ins VectorOfLengthAndType<[8, 4, 2], [SI8, SI16, SI32]>:$a,
+                       VectorOfLengthAndType<[8, 4, 2], [SI8, SI16, SI32]>:$b);
+  let results = (outs VectorOfLengthAndType<[8, 4, 2], [SI16, SI32, SI64]>:$res);
+  let assemblyFormat =
+    "$a `,` $b attr-dict `:` type($a) `to` type($res)";
+}
+
+#endif // NEON_OPS
diff --git a/mlir/include/mlir/Dialect/Neon/NeonDialect.h b/mlir/include/mlir/Dialect/Neon/NeonDialect.h
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Neon/NeonDialect.h
@@ -0,0 +1,25 @@
+//===- NeonDialect.h - MLIR Dialect for Neon --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Target dialect for Neon in MLIR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_NEON_NEONDIALECT_H_
+#define MLIR_DIALECT_NEON_NEONDIALECT_H_
+
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
+
+#include "mlir/Dialect/Neon/NeonDialect.h.inc"
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/Neon/Neon.h.inc"
+
+#endif // MLIR_DIALECT_NEON_NEONDIALECT_H_
diff --git a/mlir/include/mlir/IR/StandardTypes.h b/mlir/include/mlir/IR/StandardTypes.h
--- a/mlir/include/mlir/IR/StandardTypes.h
+++ b/mlir/include/mlir/IR/StandardTypes.h
@@ -367,6 +367,11 @@
   }
 
   ArrayRef<int64_t> getShape() const;
+
+  /// Get or create a new VectorType with the same shape as `this` and an
+  /// element type of bitwidth scaled by `scale`.
+  /// Return null if the scaled element type cannot be represented.
+  VectorType scaleElementBitwidth(unsigned scale);
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h
--- a/mlir/include/mlir/InitAllDialects.h
+++ b/mlir/include/mlir/InitAllDialects.h
@@ -20,9 +20,11 @@
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMAVX512Dialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/LLVMIR/LLVMNeonDialect.h"
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/Dialect/Neon/NeonDialect.h"
 #include "mlir/Dialect/OpenACC/OpenACC.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Dialect/PDL/IR/PDL.h"
@@ -49,7 +51,9 @@
                   gpu::GPUDialect,
                   LLVM::LLVMAVX512Dialect,
                   LLVM::LLVMDialect,
+                  LLVM::LLVMNeonDialect,
                   linalg::LinalgDialect,
+                  neon::NeonDialect,
                   scf::SCFDialect,
                   omp::OpenMPDialect,
                   pdl::PDLDialect,
diff --git a/mlir/include/mlir/InitAllTranslations.h b/mlir/include/mlir/InitAllTranslations.h
--- a/mlir/include/mlir/InitAllTranslations.h
+++ b/mlir/include/mlir/InitAllTranslations.h
@@ -23,6 +23,7 @@
 void registerToNVVMIRTranslation();
 void registerToROCDLIRTranslation();
 void registerAVX512ToLLVMIRTranslation();
+void registerNeonToLLVMIRTranslation();
 
 // This function should be called before creating any MLIRContext if one
 // expects all the possible translations to be made available to the context
@@ -36,6 +37,7 @@
     registerToNVVMIRTranslation();
     registerToROCDLIRTranslation();
     registerAVX512ToLLVMIRTranslation();
+    registerNeonToLLVMIRTranslation();
     return true;
   }();
   (void)initOnce;
diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt
--- a/mlir/lib/Conversion/CMakeLists.txt
+++ b/mlir/lib/Conversion/CMakeLists.txt
@@ -9,6 +9,7 @@
 add_subdirectory(LinalgToLLVM)
 add_subdirectory(LinalgToSPIRV)
 add_subdirectory(LinalgToStandard)
+add_subdirectory(NeonToLLVM)
 add_subdirectory(OpenMPToLLVM)
 add_subdirectory(PDLToPDLInterp)
 add_subdirectory(SCFToGPU)
diff --git a/mlir/lib/Conversion/NeonToLLVM/CMakeLists.txt b/mlir/lib/Conversion/NeonToLLVM/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Conversion/NeonToLLVM/CMakeLists.txt
@@ -0,0 +1,19 @@
+add_mlir_conversion_library(MLIRNeonToLLVM
+  ConvertNeonToLLVM.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/NeonToLLVM
+
+  DEPENDS
+  MLIRConversionPassIncGen
+
+  LINK_COMPONENTS
+  Core
+
+  LINK_LIBS PUBLIC
+  MLIRNeon
+  MLIRLLVMNeon
+  MLIRLLVMIR
+  MLIRStandardToLLVM
+  MLIRTransforms
+  )
diff --git a/mlir/lib/Conversion/NeonToLLVM/ConvertNeonToLLVM.cpp b/mlir/lib/Conversion/NeonToLLVM/ConvertNeonToLLVM.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Conversion/NeonToLLVM/ConvertNeonToLLVM.cpp
@@ -0,0 +1,129 @@
+//===- ConvertNeonToLLVM.cpp - Convert Neon to the LLVM dialect -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/NeonToLLVM/ConvertNeonToLLVM.h"
+
+#include "../PassDetail.h"
+#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
+#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
+#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/LLVMIR/LLVMNeonDialect.h"
+#include "mlir/Dialect/Neon/NeonDialect.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Dialect/Vector/VectorOps.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Pass/Pass.h"
+
+using namespace mlir;
+using namespace mlir::vector;
+using namespace mlir::neon;
+
+// TODO: Code is currently copy-pasted and adapted from the code
+// 1-1 LLVM conversion. It would better if it were properly exposed in core and
+// reusable.
+/// Basic lowering implementation for one-to-one rewriting from Neon Ops to
+/// LLVM Dialect Ops. Convert the type of the result to an LLVM type, pass
+/// operands as is, preserve attributes.
+template <typename SourceOp, typename TargetOp>
+static LogicalResult
+matchAndRewriteOneToOne(const ConvertToLLVMPattern &lowering,
+                        LLVMTypeConverter &typeConverter, Operation *op,
+                        ArrayRef<Value> operands,
+                        ConversionPatternRewriter &rewriter) {
+  unsigned numResults = op->getNumResults();
+
+  Type packedType;
+  if (numResults != 0) {
+    packedType = typeConverter.packFunctionResults(op->getResultTypes());
+    if (!packedType)
+      return failure();
+  }
+
+  auto newOp = rewriter.create<TargetOp>(op->getLoc(), packedType, operands,
+                                         op->getAttrs());
+
+  // If the operation produced 0 or 1 result, return them immediately.
+  if (numResults == 0)
+    return rewriter.eraseOp(op), success();
+  if (numResults == 1)
+    return rewriter.replaceOp(op, newOp.getOperation()->getResult(0)),
+           success();
+
+  // Otherwise, it had been converted to an operation producing a structure.
+  // Extract individual results from the structure and return them as list.
+  SmallVector<Value, 4> results;
+  results.reserve(numResults);
+  for (unsigned i = 0; i < numResults; ++i) {
+    auto type = typeConverter.convertType(op->getResult(i).getType());
+    results.push_back(rewriter.create<LLVM::ExtractValueOp>(
+        op->getLoc(), type, newOp.getOperation()->getResult(0),
+        rewriter.getI64ArrayAttr(i)));
+  }
+  rewriter.replaceOp(op, results);
+  return success();
+}
+
+namespace {
+// TODO: Patterns are too verbose due to the fact that we have 1 op (e.g.
+// SMullOp) and different possible target ops. It would be better to take
+// a Functor so that all these conversions become 1-liners.
+struct SMullOpConversion : public ConvertToLLVMPattern {
+  explicit SMullOpConversion(MLIRContext *context,
+                             LLVMTypeConverter &typeConverter)
+      : ConvertToLLVMPattern(SMullOp::getOperationName(), context,
+                             typeConverter) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    return matchAndRewriteOneToOne<SMullOp, LLVM::aarch64_neon_smull>(
+        *this, this->typeConverter, op, operands, rewriter);
+  }
+};
+
+} // namespace
+
+/// Populate the given list with patterns that convert from Neon to LLVM.
+void mlir::populateNeonToLLVMConversionPatterns(
+    LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
+  MLIRContext *ctx = converter.getDialect()->getContext();
+  // clang-format off
+  patterns.insert<SMullOpConversion>(ctx, converter);
+  // clang-format on
+}
+
+namespace {
+struct ConvertNeonToLLVMPass
+    : public ConvertNeonToLLVMBase<ConvertNeonToLLVMPass> {
+  void runOnOperation() override;
+};
+} // namespace
+
+void ConvertNeonToLLVMPass::runOnOperation() {
+  // Convert to the LLVM IR dialect.
+  OwningRewritePatternList patterns;
+  LLVMTypeConverter converter(&getContext());
+  populateNeonToLLVMConversionPatterns(converter, patterns);
+  populateVectorToLLVMConversionPatterns(converter, patterns);
+  populateStdToLLVMConversionPatterns(converter, patterns);
+
+  ConversionTarget target(getContext());
+  target.addLegalDialect<LLVM::LLVMDialect>();
+  target.addLegalDialect<LLVM::LLVMNeonDialect>();
+  target.addIllegalDialect<neon::NeonDialect>();
+  if (failed(applyPartialConversion(getOperation(), target,
+                                    std::move(patterns)))) {
+    signalPassFailure();
+  }
+}
+
+std::unique_ptr<OperationPass<ModuleOp>> mlir::createConvertNeonToLLVMPass() {
+  return std::make_unique<ConvertNeonToLLVMPass>();
+}
diff --git a/mlir/lib/Conversion/PassDetail.h b/mlir/lib/Conversion/PassDetail.h
--- a/mlir/lib/Conversion/PassDetail.h
+++ b/mlir/lib/Conversion/PassDetail.h
@@ -27,6 +27,7 @@
 namespace LLVM {
 class LLVMDialect;
 class LLVMAVX512Dialect;
+class LLVMNeonDialect;
 } // end namespace LLVM
 
 namespace NVVM {
diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -1524,7 +1524,6 @@
 
     auto loc = op.getLoc();
     auto elemType = dstType.getElementType();
-    assert(elemType.isSignlessIntOrIndexOrFloat());
 
     // Single offset can be more efficiently shuffled.
     if (op.offsets().getValue().size() == 1) {
diff --git a/mlir/lib/Dialect/CMakeLists.txt b/mlir/lib/Dialect/CMakeLists.txt
--- a/mlir/lib/Dialect/CMakeLists.txt
+++ b/mlir/lib/Dialect/CMakeLists.txt
@@ -4,6 +4,7 @@
 add_subdirectory(GPU)
 add_subdirectory(Linalg)
 add_subdirectory(LLVMIR)
+add_subdirectory(Neon)
 add_subdirectory(OpenACC)
 add_subdirectory(OpenMP)
 add_subdirectory(PDL)
diff --git a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
--- a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
+++ b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
@@ -49,6 +49,27 @@
   MLIRSideEffectInterfaces
   )
 
+add_mlir_dialect_library(MLIRLLVMNeon
+  IR/LLVMNeonDialect.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR
+
+  DEPENDS
+  MLIRLLVMNeonIncGen
+  MLIRLLVMNeonConversionsIncGen
+  intrinsics_gen
+
+  LINK_COMPONENTS
+  AsmParser
+  Core
+
+  LINK_LIBS PUBLIC
+  MLIRIR
+  MLIRLLVMIR
+  MLIRSideEffectInterfaces
+  )
+
 add_mlir_dialect_library(MLIRNVVMIR
   IR/NVVMDialect.cpp
 
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMNeonDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMNeonDialect.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMNeonDialect.cpp
@@ -0,0 +1,31 @@
+//===- LLVMNeonDialect.cpp - MLIR LLVMNeon ops implementation -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVMNeon dialect and its operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/IntrinsicsAArch64.h"
+
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/LLVMIR/LLVMNeonDialect.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/OpImplementation.h"
+#include "mlir/IR/TypeUtilities.h"
+
+using namespace mlir;
+
+void LLVM::LLVMNeonDialect::initialize() {
+  addOperations<
+#define GET_OP_LIST
+#include "mlir/Dialect/LLVMIR/LLVMNeon.cpp.inc"
+      >();
+}
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/LLVMIR/LLVMNeon.cpp.inc"
diff --git a/mlir/lib/Dialect/Neon/CMakeLists.txt b/mlir/lib/Dialect/Neon/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Dialect/Neon/CMakeLists.txt
@@ -0,0 +1,14 @@
+add_mlir_dialect_library(MLIRNeon
+  IR/NeonDialect.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Neon
+
+  DEPENDS
+  MLIRNeonIncGen
+
+  LINK_LIBS PUBLIC
+  MLIRIR
+  MLIRSideEffectInterfaces
+  MLIRVectorToLLVM
+  )
diff --git a/mlir/lib/Dialect/Neon/IR/NeonDialect.cpp b/mlir/lib/Dialect/Neon/IR/NeonDialect.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Dialect/Neon/IR/NeonDialect.cpp
@@ -0,0 +1,29 @@
+//===- NeonOps.cpp - MLIR Neon ops implementation -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Neon dialect and its operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Neon/NeonDialect.h"
+#include "mlir/Dialect/Vector/VectorOps.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/OpImplementation.h"
+#include "mlir/IR/TypeUtilities.h"
+
+using namespace mlir;
+
+void neon::NeonDialect::initialize() {
+  addOperations<
+#define GET_OP_LIST
+#include "mlir/Dialect/Neon/Neon.cpp.inc"
+      >();
+}
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/Neon/Neon.cpp.inc"
diff --git a/mlir/lib/IR/StandardTypes.cpp b/mlir/lib/IR/StandardTypes.cpp
--- a/mlir/lib/IR/StandardTypes.cpp
+++ b/mlir/lib/IR/StandardTypes.cpp
@@ -289,6 +289,30 @@
 
 ArrayRef<int64_t> VectorType::getShape() const { return getImpl()->getShape(); }
 
+VectorType VectorType::scaleElementBitwidth(unsigned scale) {
+  if (!scale)
+    return VectorType();
+
+  MLIRContext *ctx = getContext();
+  if (auto et = getElementType().dyn_cast<IntegerType>()) {
+    return VectorType::get(
+        getShape(),
+        IntegerType::get(scale * et.getWidth(), et.getSignedness(), ctx));
+  }
+  FloatType et = getElementType().cast<FloatType>();
+  if (et == FloatType::getF16(ctx)) {
+    if (scale == 2)
+      return VectorType::get(getShape(), FloatType::getF32(ctx));
+    if (scale == 4)
+      return VectorType::get(getShape(), FloatType::getF64(ctx));
+  }
+  if (et == FloatType::getF32(ctx))
+    if (scale == 2)
+      return VectorType::get(getShape(), FloatType::getF64(ctx));
+  // TODO: BF16 -> F32 and F64?
+  return VectorType();
+}
+
 //===----------------------------------------------------------------------===//
 // TensorType
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Target/CMakeLists.txt b/mlir/lib/Target/CMakeLists.txt
--- a/mlir/lib/Target/CMakeLists.txt
+++ b/mlir/lib/Target/CMakeLists.txt
@@ -55,6 +55,25 @@
   MLIRTargetLLVMIRModuleTranslation
   )
 
+add_mlir_translation_library(MLIRTargetNeon
+  LLVMIR/LLVMNeonIntr.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR
+
+  DEPENDS
+  MLIRLLVMNeonConversionsIncGen
+
+  LINK_COMPONENTS
+  Core
+
+  LINK_LIBS PUBLIC
+  MLIRIR
+  MLIRLLVMNeon
+  MLIRLLVMIR
+  MLIRTargetLLVMIRModuleTranslation
+  )
+
 add_mlir_translation_library(MLIRTargetNVVMIR
   LLVMIR/ConvertToNVVMIR.cpp
 
diff --git a/mlir/lib/Target/LLVMIR/LLVMNeonIntr.cpp b/mlir/lib/Target/LLVMIR/LLVMNeonIntr.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Target/LLVMIR/LLVMNeonIntr.cpp
@@ -0,0 +1,63 @@
+//===- NeonIntr.cpp - Convert MLIR LLVM dialect to LLVM intrinsics --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a translation between the MLIR LLVM and Neon dialects
+// and LLVM IR with AVX intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/LLVMIR/LLVMNeonDialect.h"
+#include "mlir/Target/LLVMIR/ModuleTranslation.h"
+#include "mlir/Translation.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+
+using namespace mlir;
+
+namespace {
+class LLVMNeonModuleTranslation : public LLVM::ModuleTranslation {
+  friend LLVM::ModuleTranslation;
+
+public:
+  using LLVM::ModuleTranslation::ModuleTranslation;
+
+protected:
+  LogicalResult convertOperation(Operation &opInst,
+                                 llvm::IRBuilder<> &builder) override {
+#include "mlir/Dialect/LLVMIR/LLVMNeonConversions.inc"
+
+    return LLVM::ModuleTranslation::convertOperation(opInst, builder);
+  }
+};
+
+std::unique_ptr<llvm::Module>
+translateLLVMNeonModuleToLLVMIR(Operation *m, llvm::LLVMContext &llvmContext,
+                                StringRef name) {
+  return LLVM::ModuleTranslation::translateModule<LLVMNeonModuleTranslation>(
+      m, llvmContext, name);
+}
+} // end namespace
+
+namespace mlir {
+void registerNeonToLLVMIRTranslation() {
+  TranslateFromMLIRRegistration reg(
+      "neon-mlir-to-llvmir",
+      [](ModuleOp module, raw_ostream &output) {
+        llvm::LLVMContext llvmContext;
+        auto llvmModule = translateLLVMNeonModuleToLLVMIR(module, llvmContext,
+                                                          "LLVMDialectModule");
+        if (!llvmModule)
+          return failure();
+
+        llvmModule->print(output, nullptr);
+        return success();
+      },
+      [](DialectRegistry &registry) {
+        registry.insert<LLVM::LLVMNeonDialect, LLVM::LLVMDialect>();
+      });
+}
+} // namespace mlir
diff --git a/mlir/test/Conversion/NeonToLLVM/convert-to-llvm.mlir b/mlir/test/Conversion/NeonToLLVM/convert-to-llvm.mlir
new file mode 100644
--- /dev/null
+++ b/mlir/test/Conversion/NeonToLLVM/convert-to-llvm.mlir
@@ -0,0 +1,21 @@
+// RUN: mlir-opt %s -convert-neon-to-llvm
+//| mlir-opt | FileCheck %s
+
+// CHECK-LABEL: @neon_smull
+func @neon_smull(%a: vector<8xsi8>, %b: vector<8xsi8>)
+    -> (vector<8xsi16>, vector<4xsi32>, vector<2xsi64>) {
+  // CHECK: neon.smull{{.*}}: (!llvm.vec<8 x i8>, !llvm.vec<8 x i8>) -> !llvm.vec<8 x i16>
+  %0 = neon.smull %a, %b : vector<8xsi8> to vector<8xsi16>
+  %00 = vector.extract_strided_slice %0 {offsets = [3], sizes = [4], strides = [1]}:
+    vector<8xsi16> to vector<4xsi16>
+
+  // CHECK: neon.smull{{.*}}: (!llvm.vec<4 x i16>, !llvm.vec<4 x i16>) -> !llvm.vec<4 x i32>
+  %1 = neon.smull %00, %00 : vector<4xsi16> to vector<4xsi32>
+  %11 = vector.extract_strided_slice %1 {offsets = [1], sizes = [2], strides = [1]}:
+    vector<4xsi32> to vector<2xsi32>
+
+  // CHECK: neon.smull{{.*}}: (!llvm.vec<2 x i32>, !llvm.vec<2 x i32>) -> !llvm.vec<2 x i64>
+  %2 = neon.smull %11, %11 : vector<2xsi32> to vector<2xsi64>
+
+  return %0, %1, %2 : vector<8xsi16>, vector<4xsi32>, vector<2xsi64>
+}
diff --git a/mlir/test/Dialect/Neon/roundtrip.mlir b/mlir/test/Dialect/Neon/roundtrip.mlir
new file mode 100644
--- /dev/null
+++ b/mlir/test/Dialect/Neon/roundtrip.mlir
@@ -0,0 +1,20 @@
+// RUN: mlir-opt -verify-diagnostics %s | mlir-opt | FileCheck %s
+
+// CHECK-LABEL: @neon_smull
+func @neon_smull(%a: vector<8xsi8>, %b: vector<8xsi8>)
+    -> (vector<8xsi16>, vector<4xsi32>, vector<2xsi64>) {
+  // CHECK: neon.smull {{.*}}: vector<8xsi8> to vector<8xsi16>
+  %0 = neon.smull %a, %b : vector<8xsi8> to vector<8xsi16>
+  %00 = vector.extract_strided_slice %0 {offsets = [3], sizes = [4], strides = [1]}:
+    vector<8xsi16> to vector<4xsi16>
+
+  // CHECK: neon.smull {{.*}}: vector<4xsi16> to vector<4xsi32>
+  %1 = neon.smull %00, %00 : vector<4xsi16> to vector<4xsi32>
+  %11 = vector.extract_strided_slice %1 {offsets = [1], sizes = [2], strides = [1]}:
+    vector<4xsi32> to vector<2xsi32>
+
+  // CHECK: neon.smull {{.*}}: vector<2xsi32> to vector<2xsi64>
+  %2 = neon.smull %11, %11 : vector<2xsi32> to vector<2xsi64>
+
+  return %0, %1, %2 : vector<8xsi16>, vector<4xsi32>, vector<2xsi64>
+}
diff --git a/mlir/test/Target/neon.mlir b/mlir/test/Target/neon.mlir
new file mode 100644
--- /dev/null
+++ b/mlir/test/Target/neon.mlir
@@ -0,0 +1,25 @@
+// RUN: mlir-opt -verify-diagnostics %s | mlir-opt | mlir-translate --neon-mlir-to-llvmir | FileCheck %s
+
+// CHECK-LABEL: @neon_smull
+llvm.func @neon_smull(%arg0: !llvm.vec<8 x i8>, %arg1: !llvm.vec<8 x i8>) -> !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)> {
+  //      CHECK: %[[V0:.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %{{.*}}, <8 x i8> %{{.*}})
+  // CHECK-NEXT: %[[V00:.*]] = shufflevector <8 x i16> %3, <8 x i16> %[[V0]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+  %0 = "llvm_neon.smull"(%arg0, %arg1) : (!llvm.vec<8 x i8>, !llvm.vec<8 x i8>) -> !llvm.vec<8 x i16>
+  %1 = llvm.shufflevector %0, %0 [3, 4, 5, 6] : !llvm.vec<8 x i16>, !llvm.vec<8 x i16>
+
+  // CHECK-NEXT: %[[V1:.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %[[V00]], <4 x i16> %[[V00]])
+  // CHECK-NEXT: %[[V11:.*]] = shufflevector <4 x i32> %[[V1]], <4 x i32> %[[V1]], <2 x i32> <i32 1, i32 2>
+  %2 = "llvm_neon.smull"(%1, %1) : (!llvm.vec<4 x i16>, !llvm.vec<4 x i16>) -> !llvm.vec<4 x i32>
+  %3 = llvm.shufflevector %2, %2 [1, 2] : !llvm.vec<4 x i32>, !llvm.vec<4 x i32>
+
+  // CHECK-NEXT: %[[V1:.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %[[V11]], <2 x i32> %[[V11]])
+  %4 = "llvm_neon.smull"(%3, %3) : (!llvm.vec<2 x i32>, !llvm.vec<2 x i32>) -> !llvm.vec<2 x i64>
+
+  %5 = llvm.mlir.undef : !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)>
+  %6 = llvm.insertvalue %0, %5[0] : !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)>
+  %7 = llvm.insertvalue %2, %6[1] : !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)>
+  %8 = llvm.insertvalue %4, %7[2] : !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)>
+
+  //      CHECK: ret { <8 x i16>, <4 x i32>, <2 x i64> }
+  llvm.return %8 : !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)>
+}