This is an archive of the discontinued LLVM Phabricator instance.

In D20836#444911, @tra wrote:

I guess we would not be able to remove convergent from inline asm automatically. Do we need a way to explicitly remove convergent from inline asm?

We can think about it. I'm not sure it will make a big difference, frankly. Like, if this encourages people to write less inline asm, I'm onboard with that. :)

Revision Contents

Path

Size

cfe/

trunk/

lib/

CodeGen/

CGStmt.cpp

8 lines

test/

CodeGenCUDA/

convergent.cu

6 lines

Diff 59133

cfe/trunk/lib/CodeGen/CGStmt.cpp

Show First 20 Lines • Show All 2,048 Lines • ▼ Show 20 Lines	void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
} else {		} else {
// At least put the line number on MS inline asm blobs.		// At least put the line number on MS inline asm blobs.
auto Loc = llvm::ConstantInt::get(Int32Ty, S.getAsmLoc().getRawEncoding());		auto Loc = llvm::ConstantInt::get(Int32Ty, S.getAsmLoc().getRawEncoding());
Result->setMetadata("srcloc",		Result->setMetadata("srcloc",
llvm::MDNode::get(getLLVMContext(),		llvm::MDNode::get(getLLVMContext(),
llvm::ConstantAsMetadata::get(Loc)));		llvm::ConstantAsMetadata::get(Loc)));
}		}

		if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
		// Conservatively, mark all inline asm blocks in CUDA as convergent
		// (meaning, they may call an intrinsically convergent op, such as bar.sync,
		// and so can't have certain optimizations applied around them).
		Result->addAttribute(llvm::AttributeSet::FunctionIndex,
		llvm::Attribute::Convergent);
		}

// Extract all of the register value results from the asm.		// Extract all of the register value results from the asm.
std::vector<llvm::Value*> RegResults;		std::vector<llvm::Value*> RegResults;
if (ResultRegTypes.size() == 1) {		if (ResultRegTypes.size() == 1) {
RegResults.push_back(Result);		RegResults.push_back(Result);
} else {		} else {
for (unsigned i = 0, e = ResultRegTypes.size(); i != e; ++i) {		for (unsigned i = 0, e = ResultRegTypes.size(); i != e; ++i) {
llvm::Value *Tmp = Builder.CreateExtractValue(Result, i, "asmresult");		llvm::Value *Tmp = Builder.CreateExtractValue(Result, i, "asmresult");
RegResults.push_back(Tmp);		RegResults.push_back(Tmp);
▲ Show 20 Lines • Show All 147 Lines • Show Last 20 Lines

cfe/trunk/test/CodeGenCUDA/convergent.cu

	Show All 19 Lines
	// HOST-NEXT: define void @_Z3barv			// HOST-NEXT: define void @_Z3barv
	// DEVICE: Function Attrs:			// DEVICE: Function Attrs:
	// DEVICE-SAME: convergent			// DEVICE-SAME: convergent
	// DEVICE-NEXT: define void @_Z3barv			// DEVICE-NEXT: define void @_Z3barv
	__host__ __device__ void baz();			__host__ __device__ void baz();
	__host__ __device__ void bar() {			__host__ __device__ void bar() {
	// DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]			// DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]
	baz();			baz();
				// DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]]
				int x;
				asm ("trap;" : "=l"(x));
				// DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]]
				asm volatile ("trap;");
	}			}

	// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]			// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
	// DEVICE: attributes [[BAZ_ATTR]] = {			// DEVICE: attributes [[BAZ_ATTR]] = {
	// DEVICE-SAME: convergent			// DEVICE-SAME: convergent
	// DEVICE-SAME: }			// DEVICE-SAME: }
	// DEVICE: attributes [[CALL_ATTR]] = { convergent }			// DEVICE: attributes [[CALL_ATTR]] = { convergent }
				// DEVICE: attributes [[ASM_ATTR]] = { convergent

	// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]			// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
	// HOST: attributes [[BAZ_ATTR]] = {			// HOST: attributes [[BAZ_ATTR]] = {
	// HOST-NOT: convergent			// HOST-NOT: convergent
	// NOST-SAME: }			// NOST-SAME: }

This is an archive of the discontinued LLVM Phabricator instance.

[CUDA] Conservatively mark inline asm as convergent.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 59133

cfe/trunk/lib/CodeGen/CGStmt.cpp

cfe/trunk/test/CodeGenCUDA/convergent.cu

[CUDA] Conservatively mark inline asm as convergent.
ClosedPublic