diff --git a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt --- a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt @@ -37,6 +37,7 @@ set(LLVM_TARGET_DEFINITIONS LLVMIntrinsicOps.td) mlir_tablegen(LLVMIntrinsicConversions.inc -gen-llvmir-conversions) +mlir_tablegen(LLVMIntrinsicToLLVMIROpPairs.inc -gen-llvmintrinsic-to-llvmirop-pairs) add_public_tablegen_target(MLIRLLVMIntrinsicConversionsIncGen) add_mlir_dialect(NVVMOps nvvm) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -278,6 +278,7 @@ string resultPattern = !if(!gt(numResults, 1), LLVM_IntrPatterns.structResult, LLVM_IntrPatterns.result); + string id = enumName; let llvmBuilder = [{ llvm::Module *module = builder.GetInsertBlock()->getModule(); llvm::Function *fn = llvm::Intrinsic::getDeclaration( diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp --- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Support/Error.h" @@ -654,6 +655,13 @@ return opcMap.lookup(opcode); } +static StringRef lookupOperationNameFromIntrinsicID(unsigned id) { + static const DenseMap intrMap = { +#include "mlir/Dialect/LLVMIR/LLVMIntrinsicToLLVMIROpPairs.inc" + }; + return intrMap.lookup(id); +} + static ICmpPredicate getICmpPredicate(llvm::CmpInst::Predicate p) { switch (p) { default: @@ -952,6 +960,20 @@ } Operation *op; if (llvm::Function *callee = ci->getCalledFunction()) { + // For all intrinsics, try to generate to the correspond op. + if (callee->isIntrinsic()) { + auto id = callee->getIntrinsicID(); + auto opName = lookupOperationNameFromIntrinsicID(id); + if (!opName.empty()) { + OperationState state(loc, opName); + state.addOperands(ops); + state.addTypes(tys); + Operation *op = b.create(state); + if (!inst->getType()->isVoidTy()) + instMap[inst] = op->getResult(0); + return success(); + } + } op = b.create( loc, tys, SymbolRefAttr::get(b.getContext(), callee->getName()), ops); } else { @@ -1139,6 +1161,13 @@ if (!functionType) return failure(); + if (f->isIntrinsic()) { + auto opName = lookupOperationNameFromIntrinsicID(f->getIntrinsicID()); + // Skip the intrinsic decleration if we could found a correspond op. + if (!opName.empty()) + return success(); + } + b.setInsertionPoint(module.getBody(), getFuncInsertPt()); LLVMFuncOp fop = b.create(UnknownLoc::get(context), f->getName(), functionType, diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll new file mode 100644 --- /dev/null +++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll @@ -0,0 +1,716 @@ +; RUN: mlir-translate -import-llvm %s | FileCheck %s + +define void @intrinsics(float %0, float %1, <8 x float> %2, i8* %3) { + %5 = call float @llvm.fmuladd.f32(float %0, float %1, float %0) + %6 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %2, <8 x float> %2, <8 x float> %2) + %7 = call float @llvm.fma.f32(float %0, float %1, float %0) + %8 = call <8 x float> @llvm.fma.v8f32(<8 x float> %2, <8 x float> %2, <8 x float> %2) + call void @llvm.prefetch.p0i8(i8* %3, i32 0, i32 3, i32 1) + ret void +} + +; CHECK-LABEL: llvm.func @exp_test(%arg0: f32, %arg1: vector<8xf32>) +define void @exp_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.exp"(%arg0) : (f32) -> f32 + ; CHECK: "llvm.intr.exp"(%arg1) : (vector<8xf32>) -> vector<8xf32> + %3 = call float @llvm.exp.f32(float %0) + %4 = call <8 x float> @llvm.exp.v8f32(<8 x float> %1) + ret void +} + +; CHECK-LABEL: llvm.func @exp2_test(%arg0: f32, %arg1: vector<8xf32>) +define void @exp2_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.exp2"(%arg0) : (f32) -> f32 + ; CHECK: "llvm.intr.exp2"(%arg1) : (vector<8xf32>) -> vector<8xf32> + %3 = call float @llvm.exp2.f32(float %0) + %4 = call <8 x float> @llvm.exp2.v8f32(<8 x float> %1) + ret void +} + +; CHECK-LABEL: llvm.func @log_test(%arg0: f32, %arg1: vector<8xf32>) +define void @log_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.log"(%arg0) : (f32) -> f32 + ; CHECK: "llvm.intr.log"(%arg1) : (vector<8xf32>) -> vector<8xf32> + %3 = call float @llvm.log.f32(float %0) + %4 = call <8 x float> @llvm.log.v8f32(<8 x float> %1) + ret void +} + +; CHECK-LABEL: llvm.func @log10_test(%arg0: f32, %arg1: vector<8xf32>) +define void @log10_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.log10"(%arg0) : (f32) -> f32 + ; CHECK: "llvm.intr.log10"(%arg1) : (vector<8xf32>) -> vector<8xf32> + %3 = call float @llvm.log10.f32(float %0) + %4 = call <8 x float> @llvm.log10.v8f32(<8 x float> %1) + ret void +} + +; CHECK-LABEL: llvm.func @log2_test(%arg0: f32, %arg1: vector<8xf32>) +define void @log2_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.log2"(%arg0) : (f32) -> f32 + ; CHECK: "llvm.intr.log2"(%arg1) : (vector<8xf32>) -> vector<8xf32> + %3 = call float @llvm.log2.f32(float %0) + %4 = call <8 x float> @llvm.log2.v8f32(<8 x float> %1) + ret void +} + +; CHECK-LABEL: llvm.func @fabs_test(%arg0: f32, %arg1: vector<8xf32>) +define void @fabs_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.fabs"(%arg0) : (f32) -> f32 + ; CHECK: "llvm.intr.fabs"(%arg1) : (vector<8xf32>) -> vector<8xf32> + %3 = call float @llvm.fabs.f32(float %0) + %4 = call <8 x float> @llvm.fabs.v8f32(<8 x float> %1) + ret void +} +; CHECK-LABEL: llvm.func @sqrt_test(%arg0: f32, %arg1: vector<8xf32>) +define void @sqrt_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.sqrt"(%arg0) : (f32) -> f32 + ; CHECK: "llvm.intr.sqrt"(%arg1) : (vector<8xf32>) -> vector<8xf32> + %3 = call float @llvm.sqrt.f32(float %0) + %4 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %1) + ret void +} +; CHECK-LABEL: llvm.func @ceil_test(%arg0: f32, %arg1: vector<8xf32>) +define void @ceil_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.ceil"(%arg0) : (f32) -> f32 + ; CHECK: "llvm.intr.ceil"(%arg1) : (vector<8xf32>) -> vector<8xf32> + %3 = call float @llvm.ceil.f32(float %0) + %4 = call <8 x float> @llvm.ceil.v8f32(<8 x float> %1) + ret void +} +; CHECK-LABEL: llvm.func @floor_test(%arg0: f32, %arg1: vector<8xf32>) +define void @floor_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.floor"(%arg0) : (f32) -> f32 + ; CHECK: "llvm.intr.floor"(%arg1) : (vector<8xf32>) -> vector<8xf32> + %3 = call float @llvm.floor.f32(float %0) + %4 = call <8 x float> @llvm.floor.v8f32(<8 x float> %1) + ret void +} +; CHECK-LABEL: llvm.func @cos_test(%arg0: f32, %arg1: vector<8xf32>) +define void @cos_test(float %0, <8 x float> %1) { + ; CHECK: "llvm.intr.cos"(%arg0) : (f32) -> f32 + ; CHECK: "llvm.intr.cos"(%arg1) : (vector<8xf32>) -> vector<8xf32> + %3 = call float @llvm.cos.f32(float %0) + %4 = call <8 x float> @llvm.cos.v8f32(<8 x float> %1) + ret void +} + +; CHECK-LABEL: llvm.func @copysign_test(%arg0: f32, %arg1: f32, %arg2: vector<8xf32>, %arg3: vector<8xf32>) +define void @copysign_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { + ; CHECK: "llvm.intr.copysign"(%arg0, %arg1) : (f32, f32) -> f32 + ; CHECK: "llvm.intr.copysign"(%arg2, %arg3) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + %5 = call float @llvm.copysign.f32(float %0, float %1) + %6 = call <8 x float> @llvm.copysign.v8f32(<8 x float> %2, <8 x float> %3) + ret void +} +; CHECK-LABEL: llvm.func @pow_test(%arg0: f32, %arg1: f32, %arg2: vector<8xf32>, %arg3: vector<8xf32>) +define void @pow_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { + ; CHECK: "llvm.intr.pow"(%arg0, %arg1) : (f32, f32) -> f32 + ; CHECK: "llvm.intr.pow"(%arg2, %arg3) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + %5 = call float @llvm.pow.f32(float %0, float %1) + %6 = call <8 x float> @llvm.pow.v8f32(<8 x float> %2, <8 x float> %3) + ret void +} +; CHECK-LABEL: llvm.func @bitreverse_test(%arg0: i32, %arg1: vector<8xi32>) +define void @bitreverse_test(i32 %0, <8 x i32> %1) { + ; CHECK: "llvm.intr.bitreverse"(%arg0) : (i32) -> i32 + ; CHECK: "llvm.intr.bitreverse"(%arg1) : (vector<8xi32>) -> vector<8xi32> + %3 = call i32 @llvm.bitreverse.i32(i32 %0) + %4 = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %1) + ret void +} +; CHECK-LABEL: llvm.func @ctlz_test(%arg0: i32, %arg1: vector<8xi32>) +define void @ctlz_test(i32 %0, <8 x i32> %1) { + ; CHECK-DAG: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK-DAG: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: "llvm.intr.ctlz"(%arg0, %[[falseval2]]) : (i32, i1) -> i32 + ; CHECK: "llvm.intr.ctlz"(%arg1, %[[falseval1]]) : (vector<8xi32>, i1) -> vector<8xi32> + %3 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) + %4 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %1, i1 false) + ret void +} +; CHECK-LABEL: llvm.func @cttz_test(%arg0: i32, %arg1: vector<8xi32>) +define void @cttz_test(i32 %0, <8 x i32> %1) { + ; CHECK-DAG: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK-DAG: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: "llvm.intr.cttz"(%arg0, %[[falseval2]]) : (i32, i1) -> i32 + ; CHECK: "llvm.intr.cttz"(%arg1, %[[falseval1]]) : (vector<8xi32>, i1) -> vector<8xi32> + %3 = call i32 @llvm.cttz.i32(i32 %0, i1 false) + %4 = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %1, i1 false) + ret void +} + +; CHECK-LABEL: llvm.func @ctpop_test(%arg0: i32, %arg1: vector<8xi32>) +define void @ctpop_test(i32 %0, <8 x i32> %1) { + ; CHECK: "llvm.intr.ctpop"(%arg0) : (i32) -> i32 + ; CHECK: "llvm.intr.ctpop"(%arg1) : (vector<8xi32>) -> vector<8xi32> + %3 = call i32 @llvm.ctpop.i32(i32 %0) + %4 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %1) + ret void +} + +; CHECK-LABEL: llvm.func @maximum_test(%arg0: f32, %arg1: f32, %arg2: vector<8xf32>, %arg3: vector<8xf32>) +define void @maximum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { + ; CHECK: "llvm.intr.maximum"(%arg0, %arg1) : (f32, f32) -> f32 + ; CHECK: "llvm.intr.maximum"(%arg2, %arg3) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + %5 = call float @llvm.maximum.f32(float %0, float %1) + %6 = call <8 x float> @llvm.maximum.v8f32(<8 x float> %2, <8 x float> %3) + ret void +} + +; CHECK-LABEL: llvm.func @minimum_test(%arg0: f32, %arg1: f32, %arg2: vector<8xf32>, %arg3: vector<8xf32>) +define void @minimum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { + ; CHECK: "llvm.intr.minimum"(%arg0, %arg1) : (f32, f32) -> f32 + ; CHECK: "llvm.intr.minimum"(%arg2, %arg3) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + %5 = call float @llvm.minimum.f32(float %0, float %1) + %6 = call <8 x float> @llvm.minimum.v8f32(<8 x float> %2, <8 x float> %3) + ret void +} + +; CHECK-LABEL: llvm.func @maxnum_test(%arg0: f32, %arg1: f32, %arg2: vector<8xf32>, %arg3: vector<8xf32>) +define void @maxnum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { + ; CHECK: "llvm.intr.maxnum"(%arg0, %arg1) : (f32, f32) -> f32 + ; CHECK: "llvm.intr.maxnum"(%arg2, %arg3) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + %5 = call float @llvm.maxnum.f32(float %0, float %1) + %6 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %2, <8 x float> %3) + ret void +} + +; CHECK-LABEL: llvm.func @minnum_test(%arg0: f32, %arg1: f32, %arg2: vector<8xf32>, %arg3: vector<8xf32>) +define void @minnum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { + ; CHECK: "llvm.intr.minnum"(%arg0, %arg1) : (f32, f32) -> f32 + ; CHECK: "llvm.intr.minnum"(%arg2, %arg3) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + %5 = call float @llvm.minnum.f32(float %0, float %1) + %6 = call <8 x float> @llvm.minnum.v8f32(<8 x float> %2, <8 x float> %3) + ret void +} + +; CHECK-LABEL: llvm.func @smax_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) +define void @smax_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + ; CHECK: "llvm.intr.smax"(%arg0, %arg1) : (i32, i32) -> i32 + ; CHECK: "llvm.intr.smax"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + %5 = call i32 @llvm.smax.i32(i32 %0, i32 %1) + %6 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @smin_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) +define void @smin_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + ; CHECK: "llvm.intr.smin"(%arg0, %arg1) : (i32, i32) -> i32 + ; CHECK: "llvm.intr.smin"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + %5 = call i32 @llvm.smin.i32(i32 %0, i32 %1) + %6 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @umax_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) +define void @umax_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + ; CHECK: "llvm.intr.umax"(%arg0, %arg1) : (i32, i32) -> i32 + ; CHECK: "llvm.intr.umax"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + %5 = call i32 @llvm.umax.i32(i32 %0, i32 %1) + %6 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @umin_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) { +define void @umin_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + ; CHECK: "llvm.intr.umin"(%arg0, %arg1) : (i32, i32) -> i32 + ; CHECK: "llvm.intr.umin"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + %5 = call i32 @llvm.umin.i32(i32 %0, i32 %1) + %6 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} +; CHECK-LABEL: llvm.func @vector_reductions(%arg0: f32, %arg1: vector<8xf32>, %arg2: vector<8xi32>) +define void @vector_reductions(float %0, <8 x float> %1, <8 x i32> %2) { + ; CHECK: "llvm.intr.vector.reduce.add"(%arg2) : (vector<8xi32>) -> i32 + ; CHECK: "llvm.intr.vector.reduce.and"(%arg2) : (vector<8xi32>) -> i32 + ; CHECK: "llvm.intr.vector.reduce.fmax"(%arg1) : (vector<8xf32>) -> f32 + ; CHECK: "llvm.intr.vector.reduce.fmin"(%arg1) : (vector<8xf32>) -> f32 + ; CHECK: "llvm.intr.vector.reduce.mul"(%arg2) : (vector<8xi32>) -> i32 + ; CHECK: "llvm.intr.vector.reduce.or"(%arg2) : (vector<8xi32>) -> i32 + ; CHECK: "llvm.intr.vector.reduce.smax"(%arg2) : (vector<8xi32>) -> i32 + ; CHECK: "llvm.intr.vector.reduce.smin"(%arg2) : (vector<8xi32>) -> i32 + ; CHECK: "llvm.intr.vector.reduce.umax"(%arg2) : (vector<8xi32>) -> i32 + ; CHECK: "llvm.intr.vector.reduce.umin"(%arg2) : (vector<8xi32>) -> i32 + ; CHECK: "llvm.intr.vector.reduce.xor"(%arg2) : (vector<8xi32>) -> i32 + %4 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %2) + %5 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %2) + %6 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %1) + %7 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %1) + %8 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %2) + %9 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %2) + %10 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %2) + %11 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %2) + %12 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %2) + %13 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %2) + ; TODO: vector reduce fadd and fmul should be handled specially. + %14 = call float @llvm.vector.reduce.fadd.v8f32(float %0, <8 x float> %1) + %15 = call float @llvm.vector.reduce.fmul.v8f32(float %0, <8 x float> %1) + %16 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float %0, <8 x float> %1) + %17 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float %0, <8 x float> %1) + %18 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %2) + ret void +} + +; TODO: matrix intrinsic should be handled specially. +define void @matrix_intrinsics(<64 x float> %0, <48 x float> %1, float* %2, i64 %3) { + %5 = call <12 x float> @llvm.matrix.multiply.v12f32.v64f32.v48f32(<64 x float> %0, <48 x float> %1, i32 4, i32 16, i32 3) + %6 = call <48 x float> @llvm.matrix.transpose.v48f32(<48 x float> %1, i32 3, i32 16) + %7 = call <48 x float> @llvm.matrix.column.major.load.v48f32.i64(float* align 4 %2, i64 %3, i1 false, i32 3, i32 16) + call void @llvm.matrix.column.major.store.v48f32.i64(<48 x float> %7, float* align 4 %2, i64 %3, i1 false, i32 3, i32 16) + ret void +} + +; CHECK-LABEL: llvm.func @get_active_lane_mask(%arg0: i64, %arg1: i64) -> vector<7xi1> +define <7 x i1> @get_active_lane_mask(i64 %0, i64 %1) { + ; CHECK: llvm.intr.get.active.lane.mask %arg0, %arg1 : i64, i64 to vector<7xi1> + %3 = call <7 x i1> @llvm.get.active.lane.mask.v7i1.i64(i64 %0, i64 %1) + ret <7 x i1> %3 +} + +; TODO: masked load store intrinsics should be handled specially. +define void @masked_load_store_intrinsics(<7 x float>* %0, <7 x i1> %1) { + %3 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* %0, i32 1, <7 x i1> %1, <7 x float> undef) + %4 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* %0, i32 1, <7 x i1> %1, <7 x float> %3) + call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> %4, <7 x float>* %0, i32 1, <7 x i1> %1) + ret void +} + +; TODO: masked gather scatter intrinsics should be handled specially. +define void @masked_gather_scatter_intrinsics(<7 x float*> %0, <7 x i1> %1) { + %3 = call <7 x float> @llvm.masked.gather.v7f32.v7p0f32(<7 x float*> %0, i32 1, <7 x i1> %1, <7 x float> undef) + %4 = call <7 x float> @llvm.masked.gather.v7f32.v7p0f32(<7 x float*> %0, i32 1, <7 x i1> %1, <7 x float> %3) + call void @llvm.masked.scatter.v7f32.v7p0f32(<7 x float> %4, <7 x float*> %0, i32 1, <7 x i1> %1) + ret void +} + +; CHECK-LABEL: llvm.func @masked_expand_compress_intrinsics(%arg0: !llvm.ptr, %arg1: vector<7xi1>, %arg2: vector<7xf32>) +define void @masked_expand_compress_intrinsics(float* %0, <7 x i1> %1, <7 x float> %2) { + ; CHECK: "llvm.intr.masked.expandload"(%arg0, %arg1, %arg2) : (!llvm.ptr, vector<7xi1>, vector<7xf32>) -> vector<7xf32> + ; CHECK: "llvm.intr.masked.compressstore"(%0, %arg0, %arg1) : (vector<7xf32>, !llvm.ptr, vector<7xi1>) -> () + %4 = call <7 x float> @llvm.masked.expandload.v7f32(float* %0, <7 x i1> %1, <7 x float> %2) + call void @llvm.masked.compressstore.v7f32(<7 x float> %4, float* %0, <7 x i1> %1) + ret void +} + +; CHECK-LABEL: llvm.func @memcpy_test(%arg0: i32, %arg1: !llvm.ptr, %arg2: !llvm.ptr) +define void @memcpy_test(i32 %0, i8* %1, i8* %2) { + ; CHECK: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: %[[constant:.+]] = llvm.mlir.constant(10 : i64) : i64 + ; CHECK: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: "llvm.intr.memcpy"(%arg1, %arg2, %arg0, %[[falseval2]]) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + ; CHECK: "llvm.intr.memcpy.inline"(%arg1, %arg2, %[[constant]], %[[falseval1]]) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 %0, i1 false) + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %1, i8* %2, i64 10, i1 false) + ret void +} + +; CHECK-LABEL: llvm.func @memmove_test(%arg0: i32, %arg1: !llvm.ptr, %arg2: !llvm.ptr) +define void @memmove_test(i32 %0, i8* %1, i8* %2) { + ; CHECK: %[[falseval:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: "llvm.intr.memmove"(%arg1, %arg2, %arg0, %[[falseval]]) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + call void @llvm.memmove.p0i8.p0i8.i32(i8* %1, i8* %2, i32 %0, i1 false) + ret void +} + +; CHECK-LABEL: llvm.func @memset_test(%arg0: i32, %arg1: !llvm.ptr, %arg2: i8) +define void @memset_test(i32 %0, i8* %1, i8 %2) { + ; CHECK: %[[falseval:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: "llvm.intr.memset"(%arg1, %arg2, %arg0, %[[falseval]]) : (!llvm.ptr, i8, i32, i1) -> () + call void @llvm.memset.p0i8.i32(i8* %1, i8 %2, i32 %0, i1 false) + ret void +} + +; CHECK-LABEL: llvm.func @sadd_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) + ; CHECK: "llvm.intr.sadd.with.overflow"(%arg0, %arg1) : (i32, i32) -> !llvm.struct<(i32, i1)> + ; CHECK: "llvm.intr.sadd.with.overflow"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> +define void @sadd_with_overflow_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + %5 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %0, i32 %1) + %6 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @uadd_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) + ; CHECK: "llvm.intr.uadd.with.overflow"(%arg0, %arg1) : (i32, i32) -> !llvm.struct<(i32, i1)> + ; CHECK: "llvm.intr.uadd.with.overflow"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> +define void @uadd_with_overflow_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %0, i32 %1) + %6 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @ssub_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) + ; CHECK: "llvm.intr.ssub.with.overflow"(%arg0, %arg1) : (i32, i32) -> !llvm.struct<(i32, i1)> + ; CHECK: "llvm.intr.ssub.with.overflow"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> +define void @ssub_with_overflow_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + %5 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %0, i32 %1) + %6 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @usub_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) + ; CHECK: "llvm.intr.usub.with.overflow"(%arg0, %arg1) : (i32, i32) -> !llvm.struct<(i32, i1)> + ; CHECK: "llvm.intr.usub.with.overflow"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> +define void @usub_with_overflow_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + %5 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %0, i32 %1) + %6 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @smul_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) + ; CHECK: "llvm.intr.smul.with.overflow"(%arg0, %arg1) : (i32, i32) -> !llvm.struct<(i32, i1)> + ; CHECK: "llvm.intr.smul.with.overflow"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> +define void @smul_with_overflow_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + %5 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %0, i32 %1) + %6 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; CHECK-LABEL: llvm.func @umul_with_overflow_test(%arg0: i32, %arg1: i32, %arg2: vector<8xi32>, %arg3: vector<8xi32>) + ; CHECK: "llvm.intr.umul.with.overflow"(%arg0, %arg1) : (i32, i32) -> !llvm.struct<(i32, i1)> + ; CHECK: "llvm.intr.umul.with.overflow"(%arg2, %arg3) : (vector<8xi32>, vector<8xi32>) -> !llvm.struct<(vector<8xi32>, vector<8xi1>)> +define void @umul_with_overflow_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { + %5 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %0, i32 %1) + %6 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> %2, <8 x i32> %3) + ret void +} + +; TODO : support token type. +; define void @coro_id(i32 %0, i8* %1) { +; %3 = call token @llvm.coro.id(i32 %0, i8* %1, i8* %1, i8* null) +; ret void +; } + +; TODO : support token type. +; define void @coro_begin(i32 %0, i8* %1) { +; %3 = call token @llvm.coro.id(i32 %0, i8* %1, i8* %1, i8* null) +; %4 = call i8* @llvm.coro.begin(token %3, i8* %1) +; ret void +; } + +; CHECK-LABEL: llvm.func @coro_size() +define void @coro_size() { + ; CHECK: llvm.intr.coro.size : i64 + ; CHECK: llvm.intr.coro.size : i32 + %1 = call i64 @llvm.coro.size.i64() + %2 = call i32 @llvm.coro.size.i32() + ret void +} +; CHECK-LABEL: llvm.func @coro_align() +define void @coro_align() { + ; CHECK: llvm.intr.coro.align : i64 + ; CHECK: llvm.intr.coro.align : i32 + %1 = call i64 @llvm.coro.align.i64() + %2 = call i32 @llvm.coro.align.i32() + ret void +} + +; TODO : support token type. +; define void @coro_save(i8* %0) { +; %2 = call token @llvm.coro.save(i8* %0) +; ret void +; } + +; TODO : support token type. +; define void @coro_suspend(i32 %0, i1 %1, i8* %2) { +; %4 = call token @llvm.coro.id(i32 %0, i8* %2, i8* %2, i8* null) +; %5 = call i8 @llvm.coro.suspend(token %4, i1 %1) +; ret void +; } + +define void @coro_end(i8* %0, i1 %1) { + call i1 @llvm.coro.end(i8* %0, i1 %1) + ret void +} + +; TODO : support token type. +; define void @coro_free(i32 %0, i8* %1) { +; %3 = call token @llvm.coro.id(i32 %0, i8* %1, i8* %1, i8* null) +; %4 = call i8* @llvm.coro.free(token %3, i8* %1) +; ret void +; } + +; CHECK-LABEL: llvm.func @coro_resume(%arg0: !llvm.ptr) +define void @coro_resume(i8* %0) { + ; CHECK: llvm.intr.coro.resume %arg0 + call void @llvm.coro.resume(i8* %0) + ret void +} + +; CHECK-LABEL: llvm.func @eh_typeid_for(%arg0: !llvm.ptr) { +define void @eh_typeid_for(i8* %0) { + ; CHECK: llvm.intr.eh.typeid.for %arg0 : i32 + %2 = call i32 @llvm.eh.typeid.for(i8* %0) + ret void +} + +; CHECK-LABEL: llvm.func @stack_save() { +define void @stack_save() { + ; CHECK: llvm.intr.stacksave : !llvm.ptr + %1 = call i8* @llvm.stacksave() + ret void +} + +; CHECK-LABEL: llvm.func @stack_restore(%arg0: !llvm.ptr) { +define void @stack_restore(i8* %0) { + ; CHECK: llvm.intr.stackrestore %arg0 + call void @llvm.stackrestore(i8* %0) + ret void +} + +; CHECK-LABEL: llvm.func @vector_predication_intrinsics(%arg0: vector<8xi32>, %arg1: vector<8xi32>, %arg2: vector<8xf32>, %arg3: vector<8xf32>, %arg4: vector<8xi64>, %arg5: vector<8xf64>, %arg6: !llvm.vec<8 x ptr>, %arg7: i32, %arg8: f32, %arg9: !llvm.ptr, %arg10: !llvm.ptr, %arg11: vector<8xi1>, %arg12: i32) +define void @vector_predication_intrinsics(<8 x i32> %0, <8 x i32> %1, <8 x float> %2, <8 x float> %3, <8 x i64> %4, <8 x double> %5, <8 x i32*> %6, i32 %7, float %8, i32* %9, float* %10, <8 x i1> %11, i32 %12) { + ; CHECK: "llvm.intr.vp.add"(%arg0, %arg1, %arg11, %arg12) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.sub"(%arg0, %arg1, %arg11, %arg12) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.mul"(%arg0, %arg1, %arg11, %arg12) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.sdiv"(%arg0, %arg1, %arg11, %arg12) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.udiv"(%arg0, %arg1, %arg11, %arg12) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.srem"(%arg0, %arg1, %arg11, %arg12) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.urem"(%arg0, %arg1, %arg11, %arg12) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.ashr"(%arg0, %arg1, %arg11, %arg12) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.lshr"(%arg0, %arg1, %arg11, %arg12) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.shl"(%arg0, %arg1, %arg11, %arg12) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.or"(%arg0, %arg1, %arg11, %arg12) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.and"(%arg0, %arg1, %arg11, %arg12) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.xor"(%arg0, %arg1, %arg11, %arg12) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.fadd"(%arg2, %arg3, %arg11, %arg12) : (vector<8xf32>, vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32> + ; CHECK: "llvm.intr.vp.fsub"(%arg2, %arg3, %arg11, %arg12) : (vector<8xf32>, vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32> + ; CHECK: "llvm.intr.vp.fmul"(%arg2, %arg3, %arg11, %arg12) : (vector<8xf32>, vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32> + ; CHECK: "llvm.intr.vp.fdiv"(%arg2, %arg3, %arg11, %arg12) : (vector<8xf32>, vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32> + ; CHECK: "llvm.intr.vp.frem"(%arg2, %arg3, %arg11, %arg12) : (vector<8xf32>, vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32> + ; CHECK: "llvm.intr.vp.fneg"(%arg2, %arg11, %arg12) : (vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32> + ; CHECK: "llvm.intr.vp.fma"(%arg2, %arg3, %arg3, %arg11, %arg12) : (vector<8xf32>, vector<8xf32>, vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32> + ; CHECK: "llvm.intr.vp.reduce.add"(%arg7, %arg0, %arg11, %arg12) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + ; CHECK: "llvm.intr.vp.reduce.mul"(%arg7, %arg0, %arg11, %arg12) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + ; CHECK: "llvm.intr.vp.reduce.and"(%arg7, %arg0, %arg11, %arg12) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + ; CHECK: "llvm.intr.vp.reduce.or"(%arg7, %arg0, %arg11, %arg12) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + ; CHECK: "llvm.intr.vp.reduce.xor"(%arg7, %arg0, %arg11, %arg12) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + ; CHECK: "llvm.intr.vp.reduce.smax"(%arg7, %arg0, %arg11, %arg12) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + ; CHECK: "llvm.intr.vp.reduce.smin"(%arg7, %arg0, %arg11, %arg12) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + ; CHECK: "llvm.intr.vp.reduce.umax"(%arg7, %arg0, %arg11, %arg12) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + ; CHECK: "llvm.intr.vp.reduce.umin"(%arg7, %arg0, %arg11, %arg12) : (i32, vector<8xi32>, vector<8xi1>, i32) -> i32 + ; CHECK: "llvm.intr.vp.reduce.fadd"(%arg8, %arg2, %arg11, %arg12) : (f32, vector<8xf32>, vector<8xi1>, i32) -> f32 + ; CHECK: "llvm.intr.vp.reduce.fmul"(%arg8, %arg2, %arg11, %arg12) : (f32, vector<8xf32>, vector<8xi1>, i32) -> f32 + ; CHECK: "llvm.intr.vp.reduce.fmax"(%arg8, %arg2, %arg11, %arg12) : (f32, vector<8xf32>, vector<8xi1>, i32) -> f32 + ; CHECK: "llvm.intr.vp.reduce.fmin"(%arg8, %arg2, %arg11, %arg12) : (f32, vector<8xf32>, vector<8xi1>, i32) -> f32 + ; CHECK: "llvm.intr.vp.select"(%arg11, %arg0, %arg1, %arg12) : (vector<8xi1>, vector<8xi32>, vector<8xi32>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.merge"(%arg11, %arg0, %arg1, %arg12) : (vector<8xi1>, vector<8xi32>, vector<8xi32>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.store"(%arg0, %arg9, %arg11, %arg12) : (vector<8xi32>, !llvm.ptr, vector<8xi1>, i32) -> () + ; CHECK: "llvm.intr.vp.load"(%arg9, %arg11, %arg12) : (!llvm.ptr, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.experimental.vp.strided.store"(%arg0, %arg9, %arg7, %arg11, %arg12) : (vector<8xi32>, !llvm.ptr, i32, vector<8xi1>, i32) -> () + ; CHECK: "llvm.intr.experimental.vp.strided.load"(%arg9, %arg7, %arg11, %arg12) : (!llvm.ptr, i32, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.trunc"(%arg4, %arg11, %arg12) : (vector<8xi64>, vector<8xi1>, i32) -> vector<8xi32> + ; CHECK: "llvm.intr.vp.zext"(%arg0, %arg11, %arg12) : (vector<8xi32>, vector<8xi1>, i32) -> vector<8xi64> + ; CHECK: "llvm.intr.vp.sext"(%arg0, %arg11, %arg12) : (vector<8xi32>, vector<8xi1>, i32) -> vector<8xi64> + ; CHECK: "llvm.intr.vp.fptrunc"(%arg5, %arg11, %arg12) : (vector<8xf64>, vector<8xi1>, i32) -> vector<8xf32> + ; CHECK: "llvm.intr.vp.fpext"(%arg2, %arg11, %arg12) : (vector<8xf32>, vector<8xi1>, i32) -> vector<8xf64> + ; CHECK: "llvm.intr.vp.fptoui"(%arg5, %arg11, %arg12) : (vector<8xf64>, vector<8xi1>, i32) -> vector<8xi64> + ; CHECK: "llvm.intr.vp.fptosi"(%arg5, %arg11, %arg12) : (vector<8xf64>, vector<8xi1>, i32) -> vector<8xi64> + ; CHECK: "llvm.intr.vp.ptrtoint"(%arg6, %arg11, %arg12) : (!llvm.vec<8 x ptr>, vector<8xi1>, i32) -> vector<8xi64> + ; CHECK: "llvm.intr.vp.inttoptr"(%arg4, %arg11, %arg12) : (vector<8xi64>, vector<8xi1>, i32) -> !llvm.vec<8 x ptr> + %14 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + %15 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + %16 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + %17 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + %18 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + %19 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + %20 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + %21 = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + %22 = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + %23 = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + %24 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + %25 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + %26 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + %27 = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> %2, <8 x float> %3, <8 x i1> %11, i32 %12) + %28 = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> %2, <8 x float> %3, <8 x i1> %11, i32 %12) + %29 = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> %2, <8 x float> %3, <8 x i1> %11, i32 %12) + %30 = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> %2, <8 x float> %3, <8 x i1> %11, i32 %12) + %31 = call <8 x float> @llvm.vp.frem.v8f32(<8 x float> %2, <8 x float> %3, <8 x i1> %11, i32 %12) + %32 = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %2, <8 x i1> %11, i32 %12) + %33 = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %2, <8 x float> %3, <8 x float> %3, <8 x i1> %11, i32 %12) + %34 = call i32 @llvm.vp.reduce.add.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + %35 = call i32 @llvm.vp.reduce.mul.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + %36 = call i32 @llvm.vp.reduce.and.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + %37 = call i32 @llvm.vp.reduce.or.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + %38 = call i32 @llvm.vp.reduce.xor.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + %39 = call i32 @llvm.vp.reduce.smax.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + %40 = call i32 @llvm.vp.reduce.smin.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + %41 = call i32 @llvm.vp.reduce.umax.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + %42 = call i32 @llvm.vp.reduce.umin.v8i32(i32 %7, <8 x i32> %0, <8 x i1> %11, i32 %12) + %43 = call float @llvm.vp.reduce.fadd.v8f32(float %8, <8 x float> %2, <8 x i1> %11, i32 %12) + %44 = call float @llvm.vp.reduce.fmul.v8f32(float %8, <8 x float> %2, <8 x i1> %11, i32 %12) + %45 = call float @llvm.vp.reduce.fmax.v8f32(float %8, <8 x float> %2, <8 x i1> %11, i32 %12) + %46 = call float @llvm.vp.reduce.fmin.v8f32(float %8, <8 x float> %2, <8 x i1> %11, i32 %12) + %47 = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %11, <8 x i32> %0, <8 x i32> %1, i32 %12) + %48 = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %11, <8 x i32> %0, <8 x i32> %1, i32 %12) + call void @llvm.vp.store.v8i32.p0i32(<8 x i32> %0, i32* %9, <8 x i1> %11, i32 %12) + %49 = call <8 x i32> @llvm.vp.load.v8i32.p0i32(i32* %9, <8 x i1> %11, i32 %12) + call void @llvm.experimental.vp.strided.store.v8i32.p0i32.i32(<8 x i32> %0, i32* %9, i32 %7, <8 x i1> %11, i32 %12) + %50 = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0i32.i32(i32* %9, i32 %7, <8 x i1> %11, i32 %12) + %51 = call <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64> %4, <8 x i1> %11, i32 %12) + %52 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> %0, <8 x i1> %11, i32 %12) + %53 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> %0, <8 x i1> %11, i32 %12) + %54 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> %5, <8 x i1> %11, i32 %12) + %55 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> %2, <8 x i1> %11, i32 %12) + %56 = call <8 x i64> @llvm.vp.fptoui.v8i64.v8f64(<8 x double> %5, <8 x i1> %11, i32 %12) + %57 = call <8 x i64> @llvm.vp.fptosi.v8i64.v8f64(<8 x double> %5, <8 x i1> %11, i32 %12) + %58 = call <8 x i64> @llvm.vp.ptrtoint.v8i64.v8p0i32(<8 x i32*> %6, <8 x i1> %11, i32 %12) + %59 = call <8 x i32*> @llvm.vp.inttoptr.v8p0i32.v8i64(<8 x i64> %4, <8 x i1> %11, i32 %12) + ret void +} + +declare float @llvm.fmuladd.f32(float, float, float) +declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>) +declare float @llvm.fma.f32(float, float, float) +declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) +declare void @llvm.prefetch.p0i8(i8* nocapture readonly, i32 immarg, i32 immarg, i32) +declare float @llvm.exp.f32(float) +declare <8 x float> @llvm.exp.v8f32(<8 x float>) +declare float @llvm.exp2.f32(float) +declare <8 x float> @llvm.exp2.v8f32(<8 x float>) +declare float @llvm.log.f32(float) +declare <8 x float> @llvm.log.v8f32(<8 x float>) +declare float @llvm.log10.f32(float) +declare <8 x float> @llvm.log10.v8f32(<8 x float>) +declare float @llvm.log2.f32(float) +declare <8 x float> @llvm.log2.v8f32(<8 x float>) +declare float @llvm.fabs.f32(float) +declare <8 x float> @llvm.fabs.v8f32(<8 x float>) +declare float @llvm.sqrt.f32(float) +declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) +declare float @llvm.ceil.f32(float) +declare <8 x float> @llvm.ceil.v8f32(<8 x float>) +declare float @llvm.floor.f32(float) +declare <8 x float> @llvm.floor.v8f32(<8 x float>) +declare float @llvm.cos.f32(float) +declare <8 x float> @llvm.cos.v8f32(<8 x float>) +declare float @llvm.copysign.f32(float, float) +declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>) +declare float @llvm.pow.f32(float, float) +declare <8 x float> @llvm.pow.v8f32(<8 x float>, <8 x float>) +declare i32 @llvm.bitreverse.i32(i32) +declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>) +declare i32 @llvm.ctlz.i32(i32, i1 immarg) +declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1 immarg) +declare i32 @llvm.cttz.i32(i32, i1 immarg) +declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1 immarg) +declare i32 @llvm.ctpop.i32(i32) +declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) +declare float @llvm.maximum.f32(float, float) +declare <8 x float> @llvm.maximum.v8f32(<8 x float>, <8 x float>) +declare float @llvm.minimum.f32(float, float) +declare <8 x float> @llvm.minimum.v8f32(<8 x float>, <8 x float>) +declare float @llvm.maxnum.f32(float, float) +declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) +declare float @llvm.minnum.f32(float, float) +declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) +declare i32 @llvm.smax.i32(i32, i32) +declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) +declare i32 @llvm.smin.i32(i32, i32) +declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) +declare i32 @llvm.umax.i32(i32, i32) +declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>) +declare i32 @llvm.umin.i32(i32, i32) +declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>) +declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) +declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) +declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) +declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) +declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>) +declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>) +declare <12 x float> @llvm.matrix.multiply.v12f32.v64f32.v48f32(<64 x float>, <48 x float>, i32 immarg, i32 immarg, i32 immarg) +declare <48 x float> @llvm.matrix.transpose.v48f32(<48 x float>, i32 immarg, i32 immarg) +declare <48 x float> @llvm.matrix.column.major.load.v48f32.i64(float* nocapture, i64, i1 immarg, i32 immarg, i32 immarg) +declare void @llvm.matrix.column.major.store.v48f32.i64(<48 x float>, float* nocapture writeonly, i64, i1 immarg, i32 immarg, i32 immarg) +declare <7 x i1> @llvm.get.active.lane.mask.v7i1.i64(i64, i64) +declare <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>*, i32 immarg, <7 x i1>, <7 x float>) +declare void @llvm.masked.store.v7f32.p0v7f32(<7 x float>, <7 x float>*, i32 immarg, <7 x i1>) +declare <7 x float> @llvm.masked.gather.v7f32.v7p0f32(<7 x float*>, i32 immarg, <7 x i1>, <7 x float>) +declare void @llvm.masked.scatter.v7f32.v7p0f32(<7 x float>, <7 x float*>, i32 immarg, <7 x i1>) +declare <7 x float> @llvm.masked.expandload.v7f32(float*, <7 x i1>, <7 x float>) +declare void @llvm.masked.compressstore.v7f32(<7 x float>, float*, <7 x i1>) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg) +declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg) +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1 immarg) +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg) +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) +declare { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32>, <8 x i32>) +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) +declare { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32>, <8 x i32>) +declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) +declare { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32>, <8 x i32>) +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) +declare { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32>, <8 x i32>) +declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) +declare { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32>, <8 x i32>) +declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) +declare { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32>, <8 x i32>) +; declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) +; declare i8* @llvm.coro.begin(token, i8* writeonly) +declare i64 @llvm.coro.size.i64() +declare i32 @llvm.coro.size.i32() +declare i64 @llvm.coro.align.i64() +declare i32 @llvm.coro.align.i32() +; declare token @llvm.coro.save(i8*) +; declare i8 @llvm.coro.suspend(token, i1) +declare i1 @llvm.coro.end(i8*, i1) +; declare i8* @llvm.coro.free(token, i8* nocapture readonly) +declare void @llvm.coro.resume(i8*) +declare i32 @llvm.eh.typeid.for(i8*) +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*) +declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.srem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.urem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.shl.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.or.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fadd.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fsub.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fmul.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fdiv.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.frem.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.add.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.mul.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.and.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.or.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.xor.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.smax.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.smin.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.umax.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare i32 @llvm.vp.reduce.umin.v8i32(i32, <8 x i32>, <8 x i1>, i32) +declare float @llvm.vp.reduce.fadd.v8f32(float, <8 x float>, <8 x i1>, i32) +declare float @llvm.vp.reduce.fmul.v8f32(float, <8 x float>, <8 x i1>, i32) +declare float @llvm.vp.reduce.fmax.v8f32(float, <8 x float>, <8 x i1>, i32) +declare float @llvm.vp.reduce.fmin.v8f32(float, <8 x float>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.select.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) +declare <8 x i32> @llvm.vp.merge.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) +declare void @llvm.vp.store.v8i32.p0i32(<8 x i32>, i32* nocapture, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.load.v8i32.p0i32(i32* nocapture, <8 x i1>, i32) +declare void @llvm.experimental.vp.strided.store.v8i32.p0i32.i32(<8 x i32>, i32* nocapture, i32, <8 x i1>, i32) +declare <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0i32.i32(i32* nocapture, i32, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double>, <8 x i1>, i32) +declare <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.fptoui.v8i64.v8f64(<8 x double>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.fptosi.v8i64.v8f64(<8 x double>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.ptrtoint.v8i64.v8p0i32(<8 x i32*>, <8 x i1>, i32) +declare <8 x i32*> @llvm.vp.inttoptr.v8p0i32.v8i64(<8 x i64>, <8 x i1>, i32) diff --git a/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp b/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp --- a/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp +++ b/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp @@ -286,6 +286,20 @@ return false; } +static void emitIntrOpPair(const Record &record, raw_ostream &os) { + auto op = tblgen::Operator(record); + os << "{llvm::Intrinsic::" << record.getValueAsString("id") << ", " + << op.getQualCppClassName() << "::getOperationName()},\n"; +} + +static bool emitIntrOpPairs(const RecordKeeper &recordKeeper, raw_ostream &os) { + for (const auto *def : + recordKeeper.getAllDerivedDefinitions("LLVM_IntrOpBase")) + emitIntrOpPair(*def, os); + + return false; +} + static mlir::GenRegistration genLLVMIRConversions("gen-llvmir-conversions", "Generate LLVM IR conversions", emitBuilders); @@ -299,3 +313,8 @@ genEnumFromLLVMConversion("gen-enum-from-llvmir-conversions", "Generate conversions of EnumAttrs from LLVM IR", emitEnumConversionDefs); + +static mlir::GenRegistration + genLLVMIntrinsicToOpPairs("gen-llvmintrinsic-to-llvmirop-pairs", + "Generate LLVM intrinsic to LLVMIR op pairs", + emitIntrOpPairs);