diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -51,6 +51,9 @@ // Initialize class variables. void initialize(MachineFunction &MFParm); + bool isCopyFrom32Def(MachineInstr *CopyMI); + bool isInsnFrom32Def(MachineInstr *DefInsn); + bool isPhiFrom32Def(MachineInstr *MovMI); bool isMovFrom32Def(MachineInstr *MovMI); bool eliminateZExtSeq(void); @@ -75,42 +78,77 @@ LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n"); } -bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI) +bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI) { - MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg()); + MachineOperand &opnd = CopyMI->getOperand(1); - LLVM_DEBUG(dbgs() << " Def of Mov Src:"); - LLVM_DEBUG(DefInsn->dump()); + if (!opnd.isReg()) + return false; - if (!DefInsn) + // Return false if getting value from a 32bit physical register. + // Most likely, this physical register is aliased to + // function call return value or current function parameters. + Register Reg = opnd.getReg(); + if (!Register::isVirtualRegister(Reg)) return false; - if (DefInsn->isPHI()) { - for (unsigned i = 1, e = DefInsn->getNumOperands(); i < e; i += 2) { - MachineOperand &opnd = DefInsn->getOperand(i); + if (MRI->getRegClass(Reg) == &BPF::GPRRegClass) + return false; - if (!opnd.isReg()) - return false; + MachineInstr *DefInsn = MRI->getVRegDef(Reg); + if (!isInsnFrom32Def(DefInsn)) + return false; - MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg()); - // quick check on PHI incoming definitions. - if (!PhiDef || PhiDef->isPHI() || PhiDef->getOpcode() == BPF::COPY) - return false; - } - } + return true; +} - if (DefInsn->getOpcode() == BPF::COPY) { - MachineOperand &opnd = DefInsn->getOperand(1); +bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI) +{ + for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) { + MachineOperand &opnd = PhiMI->getOperand(i); if (!opnd.isReg()) return false; - Register Reg = opnd.getReg(); - if ((Register::isVirtualRegister(Reg) && - MRI->getRegClass(Reg) == &BPF::GPRRegClass)) + MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg()); + if (!PhiDef) + return false; + if (PhiDef->isPHI() && !isPhiFrom32Def(PhiDef)) + return false; + if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef)) + return false; + } + + return true; +} + +// The \p DefInsn instruction defines a virtual register. +bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn) +{ + if (!DefInsn) + return false; + + if (DefInsn->isPHI()) { + if (!isPhiFrom32Def(DefInsn)) + return false; + } else if (DefInsn->getOpcode() == BPF::COPY) { + if (!isCopyFrom32Def(DefInsn)) return false; } + return true; +} + +bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI) +{ + MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg()); + + LLVM_DEBUG(dbgs() << " Def of Mov Src:"); + LLVM_DEBUG(DefInsn->dump()); + + if (!isInsnFrom32Def(DefInsn)) + return false; + LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n"); return true; diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll --- a/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll @@ -55,6 +55,9 @@ %c.d = select i1 %cmp, i32 %c, i32 %d ret i32 %c.d } +; CHECK-LABEL: select_cc_32 +; CHECK: r{{[0-9]+}} <<= 32 +; CHECK-NEXT: r{{[0-9]+}} >>= 32 ; Function Attrs: norecurse nounwind readnone define dso_local i64 @select_cc_32_64(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_addr #0 { @@ -63,6 +66,9 @@ %c.d = select i1 %cmp, i64 %c, i64 %d ret i64 %c.d } +; CHECK-LABEL: select_cc_32_64 +; CHECK: r{{[0-9]+}} <<= 32 +; CHECK-NEXT: r{{[0-9]+}} >>= 32 ; Function Attrs: norecurse nounwind readnone define dso_local i32 @select_cc_64_32(i64 %a, i64 %b, i32 %c, i32 %d) local_unnamed_addr #0 { @@ -71,6 +77,8 @@ %c.d = select i1 %cmp, i32 %c, i32 %d ret i32 %c.d } +; CHECK-LABEL: select_cc_64_32 +; CHECK-NOT: r{{[0-9]+}} <<= 32 ; Function Attrs: norecurse nounwind readnone define dso_local i32 @selecti_cc_32(i32 %a, i32 %c, i32 %d) local_unnamed_addr #0 { @@ -79,6 +87,9 @@ %c.d = select i1 %cmp, i32 %c, i32 %d ret i32 %c.d } +; CHECK-LABEL: selecti_cc_32 +; CHECK: r{{[0-9]+}} <<= 32 +; CHECK-NEXT: r{{[0-9]+}} >>= 32 ; Function Attrs: norecurse nounwind readnone define dso_local i64 @selecti_cc_32_64(i32 %a, i64 %c, i64 %d) local_unnamed_addr #0 { @@ -87,6 +98,9 @@ %c.d = select i1 %cmp, i64 %c, i64 %d ret i64 %c.d } +; CHECK-LABEL: selecti_cc_32_64 +; CHECK: r{{[0-9]+}} <<= 32 +; CHECK-NEXT: r{{[0-9]+}} >>= 32 ; Function Attrs: norecurse nounwind readnone define dso_local i32 @selecti_cc_64_32(i64 %a, i32 %c, i32 %d) local_unnamed_addr #0 { @@ -95,6 +109,5 @@ %c.d = select i1 %cmp, i32 %c, i32 %d ret i32 %c.d } -; There shouldn't be any type promotion, all of them are expected to be -; eliminated by peephole optimization. +; CHECK-LABEL: selecti_cc_64_32 ; CHECK-NOT: r{{[0-9]+}} <<= 32 diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll @@ -0,0 +1,34 @@ +; RUN: llc -O2 -march=bpfel -mcpu=v2 -mattr=+alu32 < %s | FileCheck %s +; +; For the below test case, 'b' in 'ret == b' needs SLL/SLR. +; 'ret' in 'ret == b' does not need SLL/SLR as all 'ret' values +; are assigned through 'w = ' alu32 operations. +; +; extern int helper(int); +; int test(int a, int b, int c, int d) { +; int ret; +; if (a < b) +; ret = (c < d) ? -1 : 0; +; else +; ret = (c < a) ? 1 : 2; +; return helper(ret == b); +; } + +define dso_local i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr { +entry: + %cmp = icmp slt i32 %a, %b + %cmp1 = icmp slt i32 %c, %d + %cond = sext i1 %cmp1 to i32 + %cmp2 = icmp slt i32 %c, %a + %cond3 = select i1 %cmp2, i32 1, i32 2 + %ret.0 = select i1 %cmp, i32 %cond, i32 %cond3 + %cmp4 = icmp eq i32 %ret.0, %b + %conv = zext i1 %cmp4 to i32 + %call = tail call i32 @helper(i32 %conv) + ret i32 %call +} +; CHECK: r{{[0-9]+}} >>= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 +; CHECK: if r{{[0-9]+}} == r{{[0-9]+}} goto + +declare dso_local i32 @helper(i32) local_unnamed_addr diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll @@ -0,0 +1,34 @@ +; RUN: llc -O2 -march=bpfel -mcpu=v2 -mattr=+alu32 < %s | FileCheck %s +; +; For the below test case, both 'ret' and 'b' at 'ret == b' +; need SLL/SLR. For 'ret', 'ret = a' may receive the value +; from argument with high 32-bit invalid data. +; +; extern int helper(int); +; int test(int a, int b, int c, int d) { +; int ret; +; if (a < b) +; ret = (c < d) ? a : 0; +; else +; ret = (c < a) ? 1 : 2; +; return helper(ret == b); +; } + +define dso_local i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr { +entry: + %cmp = icmp slt i32 %a, %b + %cmp1 = icmp slt i32 %c, %d + %cond = select i1 %cmp1, i32 %a, i32 0 + %cmp2 = icmp slt i32 %c, %a + %cond3 = select i1 %cmp2, i32 1, i32 2 + %ret.0 = select i1 %cmp, i32 %cond, i32 %cond3 + %cmp4 = icmp eq i32 %ret.0, %b + %conv = zext i1 %cmp4 to i32 + %call = tail call i32 @helper(i32 %conv) + ret i32 %call +} +; CHECK: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} >>= 32 +; CHECK: if r{{[0-9]+}} == r{{[0-9]+}} goto + +declare dso_local i32 @helper(i32) local_unnamed_addr diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll --- a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll @@ -47,8 +47,8 @@ entry: %cmp = icmp ugt i32 %a, %b %c.d = select i1 %cmp, i64 %c, i64 %d -; CHECK-NOT: r{{[0-9]+}} <<= 32 -; CHECK-NOT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} <<= 32 +; CHECK-NEXT: r{{[0-9]+}} >>= 32 ; CHECK: if r{{[0-9]+}} {{<|>}} r{{[0-9]+}} goto ret i64 %c.d } @@ -58,8 +58,8 @@ ; CHECK-LABEL: select_u_2: entry: %conv = zext i32 %a to i64 -; CHECK-NOT: r{{[0-9]+}} <<= 32 -; CHECK-NOT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} <<= 32 +; CHECK-NEXT: r{{[0-9]+}} >>= 32 %cmp = icmp ugt i64 %conv, %b %c.d = select i1 %cmp, i64 %c, i64 %d ret i64 %c.d @@ -100,8 +100,23 @@ ; CHECK-LABEL: inc_p: entry: %idx.ext = zext i32 %a to i64 -; CHECK-NOT: r{{[0-9]+}} <<= 32 -; CHECK-NOT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} <<= 32 +; CHECK-NEXT: r{{[0-9]+}} >>= 32 %add.ptr = getelementptr inbounds i32, i32* %p, i64 %idx.ext ret i32* %add.ptr } + +define dso_local i32 @test() local_unnamed_addr { +; CHECK-LABEL: test: +entry: + %call = tail call i32 bitcast (i32 (...)* @helper to i32 ()*)() + %cmp = icmp sgt i32 %call, 6 +; The shifts can't be optimized out because %call comes from function call +; return i32 so the high bits might be invalid. +; CHECK: r{{[0-9]+}} <<= 32 +; CHECK-NEXT: r{{[0-9]+}} s>>= 32 + %cond = zext i1 %cmp to i32 +; CHECK: if r{{[0-9]+}} s{{<|>}} {{[0-9]+}} goto + ret i32 %cond +} +declare dso_local i32 @helper(...) local_unnamed_addr