This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
mlir/
-
lib/Conversion/GPUCommon/
-
Conversion/
-
GPUCommon/
-
GPUOpsLowering.cpp
-
test/Conversion/
-
Conversion/
-
GPUToNVVM/
-
gpu-to-nvvm.mlir
-
GPUToROCDL/
-
gpu-to-rocdl.mlir

Differential D154269

[mlir][Conversion/GPUCommon] Fix bug in conversion of `math` ops
ClosedPublic

Authored by christopherbate on Jun 30 2023, 3:11 PM.

Download Raw Diff

Details

Reviewers

ThomasRaoux
herhut

Commits

rG14858cf05dc7: [mlir][Conversion/GPUCommon] Fix bug in conversion of `math` ops

Summary

The common GPU operation transformation that lowers math operations
to function calls in the gpu-to-nvvm and gpu-to-rocdl passes handles
vector types by applying the function to each scalar and returning a
new vector. However, there was a typo that results in incorrectly
accumulating the result vector, and the rewrite returns an llvm.mlir.undef
result instead of the correct vector. A patch is added and tests are
strengthened.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

christopherbate created this revision.Jun 30 2023, 3:11 PM

Herald added a reviewer: ThomasRaoux. · View Herald TranscriptJun 30 2023, 3:11 PM

Herald added a project: Restricted Project. · View Herald Transcript

Herald added subscribers: bviyer, Moerafaat, zero9178 and 24 others. · View Herald Transcript

christopherbate requested review of this revision.Jun 30 2023, 3:11 PM

Herald added a reviewer: herhut. · View Herald TranscriptJun 30 2023, 3:11 PM

Herald added a project: Restricted Project. · View Herald Transcript

Herald added subscribers: stephenneuendorffer, nicolasvasilache, jholewinski. · View Herald Transcript

Good catch!

This revision is now accepted and ready to land.Jun 30 2023, 3:22 PM

Harbormaster completed remote builds in B242574: Diff 536452.Jun 30 2023, 4:57 PM

Closed by commit rG14858cf05dc7: [mlir][Conversion/GPUCommon] Fix bug in conversion of `math` ops (authored by christopherbate). · Explain WhyJul 3 2023, 12:27 PM

This revision was automatically updated to reflect the committed changes.

christopherbate added a commit: rG14858cf05dc7: [mlir][Conversion/GPUCommon] Fix bug in conversion of `math` ops.

Revision Contents

Path

Size

mlir/

lib/

Conversion/

GPUCommon/

GPUOpsLowering.cpp

4 lines

test/

Conversion/

GPUToNVVM/

gpu-to-nvvm.mlir

14 lines

GPUToROCDL/

gpu-to-rocdl.mlir

14 lines

Diff 536867

mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp

Show First 20 Lines • Show All 479 Lines • ▼ Show 20 Lines	for (int64_t i = 0; i < vectorType.getNumElements(); ++i) {
auto extractElement = [&](Value operand) -> Value {		auto extractElement = [&](Value operand) -> Value {
if (!isa<VectorType>(operand.getType()))		if (!isa<VectorType>(operand.getType()))
return operand;		return operand;
return rewriter.create<LLVM::ExtractElementOp>(loc, operand, index);		return rewriter.create<LLVM::ExtractElementOp>(loc, operand, index);
};		};
auto scalarOperands = llvm::map_to_vector(operands, extractElement);		auto scalarOperands = llvm::map_to_vector(operands, extractElement);
Operation *scalarOp =		Operation *scalarOp =
rewriter.create(loc, name, scalarOperands, elementType, op->getAttrs());		rewriter.create(loc, name, scalarOperands, elementType, op->getAttrs());
rewriter.create<LLVM::InsertElementOp>(loc, result, scalarOp->getResult(0),		result = rewriter.create<LLVM::InsertElementOp>(
index);		loc, result, scalarOp->getResult(0), index);
}		}

rewriter.replaceOp(op, result);		rewriter.replaceOp(op, result);
return success();		return success();
}		}

static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space) {		static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space) {
return IntegerAttr::get(IntegerType::get(ctx, 64), space);		return IntegerAttr::get(IntegerType::get(ctx, 64), space);
Show All 12 Lines

mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir

	Show First 20 Lines • Show All 510 Lines • ▼ Show 20 Lines
	}			}

	// -----			// -----

	gpu.module @test_module {			gpu.module @test_module {
	// CHECK-LABEL: func @gpu_unroll			// CHECK-LABEL: func @gpu_unroll
	func.func @gpu_unroll(%arg0 : vector<4xf32>) -> vector<4xf32> {			func.func @gpu_unroll(%arg0 : vector<4xf32>) -> vector<4xf32> {
	%result = math.exp %arg0 : vector<4xf32>			%result = math.exp %arg0 : vector<4xf32>
	// CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32			// CHECK: %[[V0:.+]] = llvm.mlir.undef : vector<4xf32>
	// CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32			// CHECK: %[[CL:.+]] = llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
	// CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32			// CHECK: %[[V1:.+]] = llvm.insertelement %[[CL]], %[[V0]]
	// CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32			// CHECK: %[[CL:.+]] = llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
				// CHECK: %[[V2:.+]] = llvm.insertelement %[[CL]], %[[V1]]
				// CHECK: %[[CL:.+]] = llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
				// CHECK: %[[V3:.+]] = llvm.insertelement %[[CL]], %[[V2]]
				// CHECK: %[[CL:.+]] = llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
				// CHECK: %[[V4:.+]] = llvm.insertelement %[[CL]], %[[V3]]
				// CHECK: return %[[V4]]
	func.return %result : vector<4xf32>			func.return %result : vector<4xf32>
	}			}
	}			}

	// -----			// -----

	gpu.module @test_module {			gpu.module @test_module {
	// CHECK-LABEL: @kernel_func			// CHECK-LABEL: @kernel_func
	▲ Show 20 Lines • Show All 87 Lines • Show Last 20 Lines

mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir

	Show First 20 Lines • Show All 450 Lines • ▼ Show 20 Lines
	}			}

	// -----			// -----

	gpu.module @test_module {			gpu.module @test_module {
	// CHECK-LABEL: func @gpu_unroll			// CHECK-LABEL: func @gpu_unroll
	func.func @gpu_unroll(%arg0 : vector<4xf32>) -> vector<4xf32> {			func.func @gpu_unroll(%arg0 : vector<4xf32>) -> vector<4xf32> {
	%result = math.exp %arg0 : vector<4xf32>			%result = math.exp %arg0 : vector<4xf32>
	// CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32			// CHECK: %[[V0:.+]] = llvm.mlir.undef : vector<4xf32>
	// CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32			// CHECK: %[[CL:.+]] = llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
	// CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32			// CHECK: %[[V1:.+]] = llvm.insertelement %[[CL]], %[[V0]]
	// CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32			// CHECK: %[[CL:.+]] = llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
				// CHECK: %[[V2:.+]] = llvm.insertelement %[[CL]], %[[V1]]
				// CHECK: %[[CL:.+]] = llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
				// CHECK: %[[V3:.+]] = llvm.insertelement %[[CL]], %[[V2]]
				// CHECK: %[[CL:.+]] = llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
				// CHECK: %[[V4:.+]] = llvm.insertelement %[[CL]], %[[V3]]
				// CHECK: return %[[V4]]
	func.return %result : vector<4xf32>			func.return %result : vector<4xf32>
	}			}
	}			}

	// -----			// -----

	gpu.module @test_module {			gpu.module @test_module {
	// CHECK-LABEL: @kernel_func			// CHECK-LABEL: @kernel_func
	Show All 19 Lines