This is an archive of the discontinued LLVM Phabricator instance.

[BPF] Remove unnecessary MOV_32_64 instructions
ClosedPublic

Authored by yonghong-song on Jun 2 2020, 6:39 PM.

Download Raw Diff

Details

Reviewers

ast
anakryiko

Commits

rG3659559cf39b: [BPF] Remove unnecessary MOV_32_64 instructions

Summary

Commit 13f6c81c5d9a ("[BPF] simplify zero extension
with MOV_32_64") tried to use MOV_32_64 instructions
instead of lshift/rshift instructions for zero extension.
This has the benefit to remove the number of instructions
and may help verifier too.

But the same commit also removed the old MOV_32_64
pruning as it deems unsafe as MOV_32_64 does have the
side effect, zeroing out the top 32bit in the register.
This caused the following failure in kernel selftest
test_cls_redirect.o. In linux kernel, we have

struct __sk_buff {
   __u32 data;
   __u32 data_end;
};

The compiler will generate 32bit load for sk_buff->data
and sk_buff->data_end. But kernel verifier will actually
loads an address (64bit address on 64bit kernel) to the
result register. In this particular example, the explicit zext
was not optimized away and destroyed top 32bit
address and the verifier rejected the program :

w2 = *(u32 *)(r1 + 76) 
... 
r2 = w2  /* MOV_32_64: this will clear top 32bit */

Currently, if the load and the zext are next to each other, the
instruction pattern match can actually capture this to
avoid MOV_32_64, e.g., in BPFInstrInfo.td, we have

def : Pat<(i64 (zextloadi32 ADDRri:$src)),
          (SUBREG_TO_REG (i64 0), (LDW32 ADDRri:$src), sub_32)>;

However, if they are not next to each other, LDW32 and
MOV_32_64 are generated, which may cause the above mentioned
problem.

BPF Backend already tried to optimize away pattern

mov_32_64 + lshift + rshift

Commit 13f6c81c5d9a may generate mov_32_64 not followed by shifts.
This patch added optimization for mov_32_64 too.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

yonghong-song created this revision.Jun 2 2020, 6:39 PM

Herald added a project: Restricted Project. · View Herald TranscriptJun 2 2020, 6:39 PM

Herald added subscribers: llvm-commits, hiraditya. · View Herald Transcript

Harbormaster failed remote builds in B58841: Diff 268033!Jun 2 2020, 7:12 PM

ast accepted this revision.Jun 2 2020, 7:43 PM

This revision is now accepted and ready to land.Jun 2 2020, 7:43 PM

Closed by commit rG3659559cf39b: [BPF] Remove unnecessary MOV_32_64 instructions (authored by yonghong-song). · Explain WhyJun 3 2020, 8:15 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

lib/

Target/

BPF/

BPFMIPeephole.cpp

53 lines

test/

CodeGen/

BPF/

remove_truncate_7.ll

55 lines

Diff 268212

llvm/lib/Target/BPF/BPFMIPeephole.cpp

Show First 20 Lines • Show All 51 Lines • ▼ Show 20 Lines	private:
// Initialize class variables.		// Initialize class variables.
void initialize(MachineFunction &MFParm);		void initialize(MachineFunction &MFParm);

bool isCopyFrom32Def(MachineInstr *CopyMI);		bool isCopyFrom32Def(MachineInstr *CopyMI);
bool isInsnFrom32Def(MachineInstr *DefInsn);		bool isInsnFrom32Def(MachineInstr *DefInsn);
bool isPhiFrom32Def(MachineInstr *MovMI);		bool isPhiFrom32Def(MachineInstr *MovMI);
bool isMovFrom32Def(MachineInstr *MovMI);		bool isMovFrom32Def(MachineInstr *MovMI);
bool eliminateZExtSeq(void);		bool eliminateZExtSeq(void);
		bool eliminateZExt(void);

std::set<MachineInstr *> PhiInsns;		std::set<MachineInstr *> PhiInsns;

public:		public:

// Main entry point for this pass.		// Main entry point for this pass.
bool runOnMachineFunction(MachineFunction &MF) override {		bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()))		if (skipFunction(MF.getFunction()))
return false;		return false;

initialize(MF);		initialize(MF);

return eliminateZExtSeq();		// First try to eliminate (zext, lshift, rshift) and then
		// try to eliminate zext.
		bool ZExtSeqExist, ZExtExist;
		ZExtSeqExist = eliminateZExtSeq();
		ZExtExist = eliminateZExt();
		return ZExtSeqExist \|\| ZExtExist;
}		}
};		};

// Initialize class variables.		// Initialize class variables.
void BPFMIPeephole::initialize(MachineFunction &MFParm) {		void BPFMIPeephole::initialize(MachineFunction &MFParm) {
MF = &MFParm;		MF = &MFParm;
MRI = &MF->getRegInfo();		MRI = &MF->getRegInfo();
TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();		TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
▲ Show 20 Lines • Show All 148 Lines • ▼ Show 20 Lines	for (MachineInstr &MI : MBB) {
Eliminated = true;		Eliminated = true;
}		}
}		}
}		}

return Eliminated;		return Eliminated;
}		}

		bool BPFMIPeephole::eliminateZExt(void) {
		MachineInstr* ToErase = nullptr;
		bool Eliminated = false;

		for (MachineBasicBlock &MBB : *MF) {
		for (MachineInstr &MI : MBB) {
		// If the previous instruction was marked for elimination, remove it now.
		if (ToErase) {
		ToErase->eraseFromParent();
		ToErase = nullptr;
		}

		if (MI.getOpcode() != BPF::MOV_32_64)
		continue;

		// Eliminate MOV_32_64 if possible.
		// MOV_32_64 rA, wB
		//
		// If wB has been zero extended, replace it with a SUBREG_TO_REG.
		// This is to workaround BPF programs where pkt->{data, data_end}
		// is encoded as u32, but actually the verifier populates them
		// as 64bit pointer. The MOV_32_64 will zero out the top 32 bits.
		LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:");
		LLVM_DEBUG(MI.dump());

		if (!isMovFrom32Def(&MI))
		continue;

		LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n");

		Register dst = MI.getOperand(0).getReg();
		Register src = MI.getOperand(1).getReg();

		// Build a SUBREG_TO_REG instruction.
		BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), dst)
		.addImm(0).addReg(src).addImm(BPF::sub_32);

		ToErase = &MI;
		Eliminated = true;
		}
		}

		return Eliminated;
		}

} // end default namespace		} // end default namespace

INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,		INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
"BPF MachineSSA Peephole Optimization For ZEXT Eliminate",		"BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
false, false)		false, false)

char BPFMIPeephole::ID = 0;		char BPFMIPeephole::ID = 0;
FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }		FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
▲ Show 20 Lines • Show All 266 Lines • Show Last 20 Lines

llvm/test/CodeGen/BPF/remove_truncate_7.ll

This file was added.

				; RUN: llc < %s -march=bpf -mattr=+alu32 -verify-machineinstrs \| FileCheck %s
				;
				; Source:
				; struct __sk_buff {
				; unsigned data;
				; unsigned data_end;
				; };
				;
				; void * test(int flag, struct __sk_buff *skb)
				; {
				; void *p;
				;
				; if (flag) {
				; p = (void *)(long)skb->data;
				; __asm__ __volatile__("": : :"memory");
				; } else {
				; p = (void *)(long)skb->data_end;
				; __asm__ __volatile__("": : :"memory");
				; }
				;
				; return p;
				; }
				; Compilation flag:
				; clang -target bpf -O2 -S -emit-llvm t.c

				%struct.__sk_buff = type { i32, i32 }

				define dso_local i8* @test(i32 %flag, %struct.__sk_buff* nocapture readonly %skb) local_unnamed_addr {
				entry:
				%tobool = icmp eq i32 %flag, 0
				br i1 %tobool, label %if.else, label %if.then

				if.then:
				%data = getelementptr inbounds %struct.__sk_buff, %struct.__sk_buff* %skb, i64 0, i32 0
				%0 = load i32, i32* %data, align 4
				tail call void asm sideeffect "", "~{memory}"()
				br label %if.end

				if.else:
				%data_end = getelementptr inbounds %struct.__sk_buff, %struct.__sk_buff* %skb, i64 0, i32 1
				%1 = load i32, i32* %data_end, align 4
				tail call void asm sideeffect "", "~{memory}"()
				br label %if.end

				if.end:
				%p.0.in.in = phi i32 [ %0, %if.then ], [ %1, %if.else ]
				%p.0.in = zext i32 %p.0.in.in to i64
				%p.0 = inttoptr i64 %p.0.in to i8*
				ret i8* %p.0
				}

				; CHECK: w0 = (u32 )(r2 + 0)
				; CHECK: w0 = (u32 )(r2 + 4)
				; CHECK-NOT: r[[#]] = w[[#]]
				; CHECK: exit