This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
include/clang/Basic/
-
clang/
-
Basic/
-
BuiltinsAMDGPU.def
-
lib/CodeGen/
-
CodeGen/
3
CGBuiltin.cpp
-
test/
-
CodeGenOpenCL/
-
builtins-amdgcn-vi.cl
-
SemaOpenCL/
-
builtins-amdgcn-error.cl

Differential D52320

AMDGPU: add __builtin_amdgcn_update_dpp
ClosedPublic

Authored by yaxunl on Sep 20 2018, 12:46 PM.

Download Raw Diff

Details

Reviewers

kzhuravl
b-sumner
arsenm

Commits

rGaae1e87f4b80: AMDGPU: add __builtin_amdgcn_update_dpp
rL344665: AMDGPU: add __builtin_amdgcn_update_dpp
rC344665: AMDGPU: add __builtin_amdgcn_update_dpp

Summary

Emit llvm.amdgcn.update.dpp for both __builtin_amdgcn_mov_dpp and
__builtin_amdgcn_update_dpp. The second last argument to
llvm.amdgcn.update.dpp will be undef for __builtin_amdgcn_mov_dpp.

Diff Detail

Repository: rC Clang

Event Timeline

yaxunl created this revision.Sep 20 2018, 12:46 PM

Herald added subscribers: t-tye, tpr, dstuttard and 3 others. · View Herald TranscriptSep 20 2018, 12:46 PM

arsenm added inline comments.Sep 24 2018, 6:01 AM

lib/CodeGen/CGBuiltin.cpp
11313–11315	The only difference between this and mov_dpp is the argument count and the intrinsic ID, so you can combine the cases

b-sumner added inline comments.Sep 28 2018, 3:47 PM

lib/CodeGen/CGBuiltin.cpp
11313–11315	We should really drop mov_dpp. It will be easier to do so if we keep the cases separate.

arsenm added inline comments.Oct 1 2018, 9:22 PM

lib/CodeGen/CGBuiltin.cpp
11313–11315	We could also emit the mov_dpp with the new intrinsic, using I think undef for the extra operand?

Ping. There's quite a bit of interest in getting this exposed by clang.

emit llvm.amdgcn.update.dpp for __builtin_amdgcn_mov_dpp.

yaxunl edited the summary of this revision. (Show Details)Oct 16 2018, 1:24 PM

LGTM

This revision is now accepted and ready to land.Oct 16 2018, 2:18 PM

Brian checked the extra argument for dpp mov should be the first one. so mov_dpp(x,...) --> update_dpp(undef, x, ...). I will fix that when committing.

Closed by commit rC344665: AMDGPU: add __builtin_amdgcn_update_dpp (authored by yaxunl). · Explain WhyOct 16 2018, 7:34 PM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

include/

clang/

Basic/

BuiltinsAMDGPU.def

1 line

lib/

CodeGen/

CGBuiltin.cpp

14 lines

test/

CodeGenOpenCL/

builtins-amdgcn-vi.cl

9 lines

SemaOpenCL/

builtins-amdgcn-error.cl

9 lines

Diff 169942

include/clang/Basic/BuiltinsAMDGPU.def

	Show First 20 Lines • Show All 116 Lines • ▼ Show 20 Lines
	TARGET_BUILTIN(__builtin_amdgcn_cosh, "hh", "nc", "16-bit-insts")			TARGET_BUILTIN(__builtin_amdgcn_cosh, "hh", "nc", "16-bit-insts")
	TARGET_BUILTIN(__builtin_amdgcn_ldexph, "hhi", "nc", "16-bit-insts")			TARGET_BUILTIN(__builtin_amdgcn_ldexph, "hhi", "nc", "16-bit-insts")
	TARGET_BUILTIN(__builtin_amdgcn_frexp_manth, "hh", "nc", "16-bit-insts")			TARGET_BUILTIN(__builtin_amdgcn_frexp_manth, "hh", "nc", "16-bit-insts")
	TARGET_BUILTIN(__builtin_amdgcn_frexp_exph, "sh", "nc", "16-bit-insts")			TARGET_BUILTIN(__builtin_amdgcn_frexp_exph, "sh", "nc", "16-bit-insts")
	TARGET_BUILTIN(__builtin_amdgcn_fracth, "hh", "nc", "16-bit-insts")			TARGET_BUILTIN(__builtin_amdgcn_fracth, "hh", "nc", "16-bit-insts")
	TARGET_BUILTIN(__builtin_amdgcn_classh, "bhi", "nc", "16-bit-insts")			TARGET_BUILTIN(__builtin_amdgcn_classh, "bhi", "nc", "16-bit-insts")
	TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime")			TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime")
	TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nc", "dpp")			TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nc", "dpp")
				TARGET_BUILTIN(__builtin_amdgcn_update_dpp, "iiiIiIiIiIb", "nc", "dpp")
	TARGET_BUILTIN(__builtin_amdgcn_s_dcache_wb, "v", "n", "vi-insts")			TARGET_BUILTIN(__builtin_amdgcn_s_dcache_wb, "v", "n", "vi-insts")

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// GFX9+ only builtins.			// GFX9+ only builtins.
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts")			TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts")

	Show All 38 Lines

lib/CodeGen/CGBuiltin.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 11,304 Lines • ▼ Show 20 Lines

Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,		Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {		const CallExpr *E) {
switch (BuiltinID) {		switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_div_scale:		case AMDGPU::BI__builtin_amdgcn_div_scale:
case AMDGPU::BI__builtin_amdgcn_div_scalef: {		case AMDGPU::BI__builtin_amdgcn_div_scalef: {
// Translate from the intrinsics's struct return to the builtin's out		// Translate from the intrinsics's struct return to the builtin's out
// argument.		// argument.

Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));		Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));

		arsenmUnsubmitted Not Done Reply Inline Actions The only difference between this and mov_dpp is the argument count and the intrinsic ID, so you can combine the cases arsenm: The only difference between this and mov_dpp is the argument count and the intrinsic ID, so you…
		b-sumnerUnsubmitted Not Done Reply Inline Actions We should really drop mov_dpp. It will be easier to do so if we keep the cases separate. b-sumner: We should really drop mov_dpp. It will be easier to do so if we keep the cases separate.
		arsenmUnsubmitted Not Done Reply Inline Actions We could also emit the mov_dpp with the new intrinsic, using I think undef for the extra operand? arsenm: We could also emit the mov_dpp with the new intrinsic, using I think undef for the extra…
llvm::Value *X = EmitScalarExpr(E->getArg(0));		llvm::Value *X = EmitScalarExpr(E->getArg(0));
llvm::Value *Y = EmitScalarExpr(E->getArg(1));		llvm::Value *Y = EmitScalarExpr(E->getArg(1));
llvm::Value *Z = EmitScalarExpr(E->getArg(2));		llvm::Value *Z = EmitScalarExpr(E->getArg(2));

llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,		llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
X->getType());		X->getType());

llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});		llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
Show All 18 Lines	case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,		llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
Src0->getType());		Src0->getType());
llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);		llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});		return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
}		}

case AMDGPU::BI__builtin_amdgcn_ds_swizzle:		case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);		return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
case AMDGPU::BI__builtin_amdgcn_mov_dpp: {		case AMDGPU::BI__builtin_amdgcn_mov_dpp:
llvm::SmallVector<llvm::Value *, 5> Args;		case AMDGPU::BI__builtin_amdgcn_update_dpp: {
for (unsigned I = 0; I != 5; ++I)		llvm::SmallVector<llvm::Value *, 6> Args;
		for (unsigned I = 0; I != E->getNumArgs(); ++I)
Args.push_back(EmitScalarExpr(E->getArg(I)));		Args.push_back(EmitScalarExpr(E->getArg(I)));
Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,		assert(Args.size() == 5 \|\| Args.size() == 6);
Args[0]->getType());		if (Args.size() == 5)
		Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType()));
		Value *F =
		CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);		return Builder.CreateCall(F, Args);
}		}
case AMDGPU::BI__builtin_amdgcn_div_fixup:		case AMDGPU::BI__builtin_amdgcn_div_fixup:
case AMDGPU::BI__builtin_amdgcn_div_fixupf:		case AMDGPU::BI__builtin_amdgcn_div_fixupf:
case AMDGPU::BI__builtin_amdgcn_div_fixuph:		case AMDGPU::BI__builtin_amdgcn_div_fixuph:
return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);		return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
case AMDGPU::BI__builtin_amdgcn_trig_preop:		case AMDGPU::BI__builtin_amdgcn_trig_preop:
case AMDGPU::BI__builtin_amdgcn_trig_preopf:		case AMDGPU::BI__builtin_amdgcn_trig_preopf:
▲ Show 20 Lines • Show All 1,457 Lines • Show Last 20 Lines

test/CodeGenOpenCL/builtins-amdgcn-vi.cl

	Show First 20 Lines • Show All 84 Lines • ▼ Show 20 Lines
	// CHECK-LABEL: @test_s_dcache_wb()			// CHECK-LABEL: @test_s_dcache_wb()
	// CHECK: call void @llvm.amdgcn.s.dcache.wb()			// CHECK: call void @llvm.amdgcn.s.dcache.wb()
	void test_s_dcache_wb()			void test_s_dcache_wb()
	{			{
	__builtin_amdgcn_s_dcache_wb();			__builtin_amdgcn_s_dcache_wb();
	}			}

	// CHECK-LABEL: @test_mov_dpp			// CHECK-LABEL: @test_mov_dpp
	// CHECK: call i32 @llvm.amdgcn.mov.dpp.i32(i32 %src, i32 0, i32 0, i32 0, i1 false)			// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %src, i32 0, i32 0, i32 0, i1 false)
	void test_mov_dpp(global int* out, int src)			void test_mov_dpp(global int* out, int src)
	{			{
	*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);			*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
	}			}

				// CHECK-LABEL: @test_update_dpp
				// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
				void test_update_dpp(global int* out, int arg1, int arg2)
				{
				*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
				}

	// CHECK-LABEL: @test_ds_fadd			// CHECK-LABEL: @test_ds_fadd
	// CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)			// CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
	void test_ds_faddf(local float *out, float src) {			void test_ds_faddf(local float *out, float src) {
	*out = __builtin_amdgcn_ds_faddf(out, src, 0, 0, false);			*out = __builtin_amdgcn_ds_faddf(out, src, 0, 0, false);
	}			}

	// CHECK-LABEL: @test_ds_fmin			// CHECK-LABEL: @test_ds_fmin
	// CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)			// CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
	Show All 9 Lines

test/SemaOpenCL/builtins-amdgcn-error.cl

	Show First 20 Lines • Show All 96 Lines • ▼ Show 20 Lines
	void test_mov_dpp2(global int* out, int a, int b, int c, int d, bool e)			void test_mov_dpp2(global int* out, int a, int b, int c, int d, bool e)
	{			{
	*out = __builtin_amdgcn_mov_dpp(a, b, 0, 0, false); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}			*out = __builtin_amdgcn_mov_dpp(a, b, 0, 0, false); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
	*out = __builtin_amdgcn_mov_dpp(a, 0, c, 0, false); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}			*out = __builtin_amdgcn_mov_dpp(a, 0, c, 0, false); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
	*out = __builtin_amdgcn_mov_dpp(a, 0, 0, d, false); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}			*out = __builtin_amdgcn_mov_dpp(a, 0, 0, d, false); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
	*out = __builtin_amdgcn_mov_dpp(a, 0, 0, 0, e); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}			*out = __builtin_amdgcn_mov_dpp(a, 0, 0, 0, e); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
	}			}

				void test_update_dpp2(global int* out, int a, int b, int c, int d, int e, bool f)
				{
				*out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, false);
				*out = __builtin_amdgcn_update_dpp(a, 0, c, 0, 0, false); // expected-error {{argument to '__builtin_amdgcn_update_dpp' must be a constant integer}}
				*out = __builtin_amdgcn_update_dpp(a, 0, 0, d, 0, false); // expected-error {{argument to '__builtin_amdgcn_update_dpp' must be a constant integer}}
				*out = __builtin_amdgcn_update_dpp(a, 0, 0, 0, e, false); // expected-error {{argument to '__builtin_amdgcn_update_dpp' must be a constant integer}}
				*out = __builtin_amdgcn_update_dpp(a, 0, 0, 0, 0, f); // expected-error {{argument to '__builtin_amdgcn_update_dpp' must be a constant integer}}
				}

	void test_ds_faddf(local float *out, float src, int a) {			void test_ds_faddf(local float *out, float src, int a) {
	*out = __builtin_amdgcn_ds_faddf(out, src, a, 0, false); // expected-error {{argument to '__builtin_amdgcn_ds_faddf' must be a constant integer}}			*out = __builtin_amdgcn_ds_faddf(out, src, a, 0, false); // expected-error {{argument to '__builtin_amdgcn_ds_faddf' must be a constant integer}}
	*out = __builtin_amdgcn_ds_faddf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_faddf' must be a constant integer}}			*out = __builtin_amdgcn_ds_faddf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_faddf' must be a constant integer}}
	*out = __builtin_amdgcn_ds_faddf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_faddf' must be a constant integer}}			*out = __builtin_amdgcn_ds_faddf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_faddf' must be a constant integer}}
	}			}

	void test_ds_fminf(local float *out, float src, int a) {			void test_ds_fminf(local float *out, float src, int a) {
	*out = __builtin_amdgcn_ds_fminf(out, src, a, 0, false); // expected-error {{argument to '__builtin_amdgcn_ds_fminf' must be a constant integer}}			*out = __builtin_amdgcn_ds_fminf(out, src, a, 0, false); // expected-error {{argument to '__builtin_amdgcn_ds_fminf' must be a constant integer}}
	Show All 9 Lines