This is an archive of the discontinued LLVM Phabricator instance.

Differential D20477

[x86, AVX] don't add a vzeroupper if that's what the code is already doing (PR27823)
ClosedPublic

Authored by spatel on May 20 2016, 10:42 AM.

Download Raw Diff

Details

Reviewers

RKSimon
ygao
aaboud

Commits

rG2959ff4a88da: [x86, AVX] don't add a vzeroupper if that's what the code is already doing…
rL270378: [x86, AVX] don't add a vzeroupper if that's what the code is already doing…

Summary

This isn't the complete fix, but it handles the trivial examples of duplicate vzero* ops in PR27823:
https://llvm.org/bugs/show_bug.cgi?id=27823
...and amusingly, the bogus cases already exist as regression tests, so let's take this baby step.

We'll need to do more in the general case where there's legitimate AVX usage in the function + there's already a vzero in the code.

Diff Detail

Repository: rL LLVM

Event Timeline

spatel updated this revision to Diff 57953.May 20 2016, 10:42 AM

spatel retitled this revision from to [x86, AVX] don't add a vzeroupper if that's what the code is already doing (PR27823).

spatel updated this object.

spatel added reviewers: RKSimon, aaboud, ygao.

spatel added a subscriber: llvm-commits.

Herald added a subscriber: mcrosier. · View Herald TranscriptMay 20 2016, 10:42 AM

Can we add tests for explicit calls to vzeroall/vzeroupper that is half way through the function with ymm accesses before + after?

LGTM.
+1 for adding the test Simon mentioned.

This revision is now accepted and ready to land.May 22 2016, 3:47 AM

In D20477#436382, @RKSimon wrote:

Can we add tests for explicit calls to vzeroall/vzeroupper that is half way through the function with ymm accesses before + after?

Thanks, Simon and Amjad.
I'm not sure of all of the cases that we need to handle, but I've started a file for that purpose with:
rL270375
That's a simplification of the example I noted in:
https://llvm.org/bugs/show_bug.cgi?id=27823#c3

I'll add a FIXME comment to this patch, so we know that this is not the complete solution.

Closed by commit rL270378: [x86, AVX] don't add a vzeroupper if that's what the code is already doing… (authored by spatel). · Explain WhyMay 22 2016, 1:29 PM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

X86/

X86VZeroUpper.cpp

6 lines

test/

CodeGen/

X86/

avx-intrinsics-fast-isel.ll

4 lines

avx-intrinsics-x86.ll

2 lines

Diff 58065

llvm/trunk/lib/Target/X86/X86VZeroUpper.cpp

Show First 20 Lines • Show All 186 Lines • ▼ Show 20 Lines	for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
// epilogue will restore YMM registers if needed.		// epilogue will restore YMM registers if needed.
bool IsReturnFromX86INTR = IsX86INTR && MI->isReturn();		bool IsReturnFromX86INTR = IsX86INTR && MI->isReturn();
bool IsControlFlow = MI->isCall() \|\| MI->isReturn();		bool IsControlFlow = MI->isCall() \|\| MI->isReturn();

// Shortcut: don't need to check regular instructions in dirty state.		// Shortcut: don't need to check regular instructions in dirty state.
if ((!IsControlFlow \|\| IsReturnFromX86INTR) && CurState == EXITS_DIRTY)		if ((!IsControlFlow \|\| IsReturnFromX86INTR) && CurState == EXITS_DIRTY)
continue;		continue;

		// Ignore existing VZERO* instructions.
		// FIXME: The existence of these instructions should be used to modify the
		// current state and/or used when deciding whether we need to create a VZU.
		if (MI->getOpcode() == X86::VZEROALL \|\| MI->getOpcode() == X86::VZEROUPPER)
		continue;

if (hasYmmReg(MI)) {		if (hasYmmReg(MI)) {
// We found a ymm-using instruction; this could be an AVX instruction,		// We found a ymm-using instruction; this could be an AVX instruction,
// or it could be control flow.		// or it could be control flow.
CurState = EXITS_DIRTY;		CurState = EXITS_DIRTY;
continue;		continue;
}		}

// Check for control-flow out of the current function (which might		// Check for control-flow out of the current function (which might
▲ Show 20 Lines • Show All 123 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll

Show First 20 Lines • Show All 3,730 Lines • ▼ Show 20 Lines	; X64-NEXT: retq
%bc = bitcast <8 x i32> %res to <8 x float>		%bc = bitcast <8 x i32> %res to <8 x float>
ret <8 x float> %bc		ret <8 x float> %bc
}		}

define void @test_mm256_zeroall() nounwind {		define void @test_mm256_zeroall() nounwind {
; X32-LABEL: test_mm256_zeroall:		; X32-LABEL: test_mm256_zeroall:
; X32: # BB#0:		; X32: # BB#0:
; X32-NEXT: vzeroall		; X32-NEXT: vzeroall
; X32-NEXT: vzeroupper
; X32-NEXT: retl		; X32-NEXT: retl
;		;
; X64-LABEL: test_mm256_zeroall:		; X64-LABEL: test_mm256_zeroall:
; X64: # BB#0:		; X64: # BB#0:
; X64-NEXT: vzeroall		; X64-NEXT: vzeroall
; X64-NEXT: vzeroupper
; X64-NEXT: retq		; X64-NEXT: retq
call void @llvm.x86.avx.vzeroall()		call void @llvm.x86.avx.vzeroall()
ret void		ret void
}		}
declare void @llvm.x86.avx.vzeroall() nounwind readnone		declare void @llvm.x86.avx.vzeroall() nounwind readnone

define void @test_mm256_zeroupper() nounwind {		define void @test_mm256_zeroupper() nounwind {
; X32-LABEL: test_mm256_zeroupper:		; X32-LABEL: test_mm256_zeroupper:
; X32: # BB#0:		; X32: # BB#0:
; X32-NEXT: vzeroupper		; X32-NEXT: vzeroupper
; X32-NEXT: vzeroupper
; X32-NEXT: retl		; X32-NEXT: retl
;		;
; X64-LABEL: test_mm256_zeroupper:		; X64-LABEL: test_mm256_zeroupper:
; X64: # BB#0:		; X64: # BB#0:
; X64-NEXT: vzeroupper		; X64-NEXT: vzeroupper
; X64-NEXT: vzeroupper
; X64-NEXT: retq		; X64-NEXT: retq
call void @llvm.x86.avx.vzeroupper()		call void @llvm.x86.avx.vzeroupper()
ret void		ret void
}		}
declare void @llvm.x86.avx.vzeroupper() nounwind readnone		declare void @llvm.x86.avx.vzeroupper() nounwind readnone

!0 = !{i32 1}		!0 = !{i32 1}

llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll

	Show First 20 Lines • Show All 4,749 Lines • ▼ Show 20 Lines
	}			}
	declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone			declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone


	define void @test_x86_avx_vzeroall() {			define void @test_x86_avx_vzeroall() {
	; AVX-LABEL: test_x86_avx_vzeroall:			; AVX-LABEL: test_x86_avx_vzeroall:
	; AVX: ## BB#0:			; AVX: ## BB#0:
	; AVX-NEXT: vzeroall			; AVX-NEXT: vzeroall
	; AVX-NEXT: vzeroupper
	; AVX-NEXT: retl			; AVX-NEXT: retl
	;			;
	; AVX512VL-LABEL: test_x86_avx_vzeroall:			; AVX512VL-LABEL: test_x86_avx_vzeroall:
	; AVX512VL: ## BB#0:			; AVX512VL: ## BB#0:
	; AVX512VL-NEXT: vzeroall			; AVX512VL-NEXT: vzeroall
	; AVX512VL-NEXT: retl			; AVX512VL-NEXT: retl
	call void @llvm.x86.avx.vzeroall()			call void @llvm.x86.avx.vzeroall()
	ret void			ret void
	}			}
	declare void @llvm.x86.avx.vzeroall() nounwind			declare void @llvm.x86.avx.vzeroall() nounwind


	define void @test_x86_avx_vzeroupper() {			define void @test_x86_avx_vzeroupper() {
	; AVX-LABEL: test_x86_avx_vzeroupper:			; AVX-LABEL: test_x86_avx_vzeroupper:
	; AVX: ## BB#0:			; AVX: ## BB#0:
	; AVX-NEXT: vzeroupper			; AVX-NEXT: vzeroupper
	; AVX-NEXT: vzeroupper
	; AVX-NEXT: retl			; AVX-NEXT: retl
	;			;
	; AVX512VL-LABEL: test_x86_avx_vzeroupper:			; AVX512VL-LABEL: test_x86_avx_vzeroupper:
	; AVX512VL: ## BB#0:			; AVX512VL: ## BB#0:
	; AVX512VL-NEXT: vzeroupper			; AVX512VL-NEXT: vzeroupper
	; AVX512VL-NEXT: retl			; AVX512VL-NEXT: retl
	call void @llvm.x86.avx.vzeroupper()			call void @llvm.x86.avx.vzeroupper()
	ret void			ret void
	▲ Show 20 Lines • Show All 240 Lines • Show Last 20 Lines