diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -444,6 +444,11 @@ class LLVM_VPReductionF : LLVM_VPReductionBase; +class LLVM_VPSelectBase + : LLVM_OneResultIntrOp<"vp." # mnem, [], [1], [NoSideEffect]>, + Arguments<(ins LLVM_VectorOf:$cond, LLVM_AnyVector:$true_val, + LLVM_AnyVector:$false_val, I32:$evl)>; + // Integer Binary def LLVM_VPAddOp : LLVM_VPBinaryI<"add">; def LLVM_VPSubOp : LLVM_VPBinaryI<"sub">; @@ -489,4 +494,30 @@ def LLVM_VPReduceFMaxOp : LLVM_VPReductionF<"fmax">; def LLVM_VPReduceFMinOp : LLVM_VPReductionF<"fmin">; +def LLVM_VPSelectMinOp : LLVM_VPSelectBase<"select">; +def LLVM_VPMergeMinOp : LLVM_VPSelectBase<"merge">; + +// Load/store +def LLVM_VPLoadOp + : LLVM_OneResultIntrOp<"vp.load", [0], [0], []>, + Arguments<(ins LLVM_AnyPointer:$ptr, + LLVM_VectorOf:$mask, I32:$evl)>; + +def LLVM_VPStoreOp + : LLVM_ZeroResultIntrOp<"vp.store", [0, 1], []>, + Arguments<(ins LLVM_AnyVector:$val, + LLVM_AnyPointer:$ptr, + LLVM_VectorOf:$mask, I32:$evl)>; + +// Strided load/store +def LLVM_VPStridedLoadOp + : LLVM_OneResultIntrOp<"experimental.vp.strided.load", [0], [0, 1], []>, + Arguments<(ins LLVM_AnyPointer:$ptr, AnyInteger:$stride, + LLVM_VectorOf:$mask, I32:$evl)>; + +def LLVM_VPStridedStoreOp + : LLVM_ZeroResultIntrOp<"experimental.vp.strided.store",[0, 1, 2], []>, + Arguments<(ins LLVM_AnyVector:$val, LLVM_AnyPointer:$ptr, + AnyInteger:$stride, LLVM_VectorOf:$mask, I32:$evl)>; + #endif // LLVM_INTRINSIC_OPS \ No newline at end of file diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir --- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir @@ -519,6 +519,8 @@ llvm.func @vector_predication_intrinsics(%A: vector<8xi32>, %B: vector<8xi32>, %C: vector<8xf32>, %D: vector<8xf32>, %i: i32, %f: f32, + %iptr : !llvm.ptr, + %fptr : !llvm.ptr, %mask: vector<8xi1>, %evl: i32) { // CHECK: call <8 x i32> @llvm.vp.add.v8i32 "llvm.intr.vp.add" (%A, %B, %mask, %evl) : @@ -623,6 +625,25 @@ "llvm.intr.vp.reduce.fmin" (%f, %C, %mask, %evl) : (f32, vector<8xf32>, vector<8xi1>, i32) -> f32 + // CHECK: call <8 x i32> @llvm.vp.select.v8i32 + "llvm.intr.vp.select" (%mask, %A, %B, %evl) : + (vector<8xi1>, vector<8xi32>, vector<8xi32>, i32) -> vector<8xi32> + // CHECK: call <8 x i32> @llvm.vp.merge.v8i32 + "llvm.intr.vp.merge" (%mask, %A, %B, %evl) : + (vector<8xi1>, vector<8xi32>, vector<8xi32>, i32) -> vector<8xi32> + + // CHECK: call void @llvm.vp.store.v8i32.p0i32 + "llvm.intr.vp.store" (%A, %iptr, %mask, %evl) : + (vector<8xi32>, !llvm.ptr, vector<8xi1>, i32) -> () + // CHECK: call <8 x i32> @llvm.vp.load.v8i32.p0i32 + "llvm.intr.vp.load" (%iptr, %mask, %evl) : + (!llvm.ptr, vector<8xi1>, i32) -> vector<8xi32> + // CHECK: call void @llvm.experimental.vp.strided.store.v8i32.p0i32.i32 + "llvm.intr.experimental.vp.strided.store" (%A, %iptr, %i, %mask, %evl) : + (vector<8xi32>, !llvm.ptr, i32, vector<8xi1>, i32) -> () + // CHECK: call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0i32.i32 + "llvm.intr.experimental.vp.strided.load" (%iptr, %i, %mask, %evl) : + (!llvm.ptr, i32, vector<8xi1>, i32) -> vector<8xi32> llvm.return } @@ -715,3 +736,7 @@ // CHECK-DAG: declare float @llvm.vp.reduce.fmul.v8f32(float, <8 x float>, <8 x i1>, i32) #0 // CHECK-DAG: declare float @llvm.vp.reduce.fmax.v8f32(float, <8 x float>, <8 x i1>, i32) #0 // CHECK-DAG: declare float @llvm.vp.reduce.fmin.v8f32(float, <8 x float>, <8 x i1>, i32) #0 +// CHECK-DAG: declare <8 x i32> @llvm.vp.select.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) #12 +// CHECK-DAG: declare <8 x i32> @llvm.vp.merge.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) #12 +// CHECK-DAG: declare void @llvm.experimental.vp.strided.store.v8i32.p0i32.i32(<8 x i32>, i32* nocapture, i32, <8 x i1>, i32) #4 +// CHECK-DAG: declare <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0i32.i32(i32* nocapture, i32, <8 x i1>, i32) #3