diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -604,6 +604,12 @@ DebugLoc DL = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); + + if (!isSigned) { + Register PromotedReg0 = RegInfo.createVirtualRegister(RC); + BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg); + return PromotedReg0; + } Register PromotedReg0 = RegInfo.createVirtualRegister(RC); Register PromotedReg1 = RegInfo.createVirtualRegister(RC); Register PromotedReg2 = RegInfo.createVirtualRegister(RC); diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -732,8 +732,7 @@ def : Pat<(i64 (sext GPR32:$src)), (SRA_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>; -def : Pat<(i64 (zext GPR32:$src)), - (SRL_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>; +def : Pat<(i64 (zext GPR32:$src)), (MOV_32_64 GPR32:$src)>; // For i64 -> i32 truncation, use the 32-bit subregister directly. def : Pat<(i32 (trunc GPR:$src)), diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -301,19 +301,16 @@ // // MOV rA, rA // - // This is particularly possible to happen when sub-register support - // enabled. The special type cast insn MOV_32_64 involves different - // register class on src (i32) and dst (i64), RA could generate useless - // instruction due to this. + // Note that we cannot remove + // MOV_32_64 rA, wA + // MOV_rr_32 wA, wA + // as these two instructions having side effects, zeroing out + // top 32 bits of rA. unsigned Opcode = MI.getOpcode(); - if (Opcode == BPF::MOV_32_64 || - Opcode == BPF::MOV_rr || Opcode == BPF::MOV_rr_32) { + if (Opcode == BPF::MOV_rr) { Register dst = MI.getOperand(0).getReg(); Register src = MI.getOperand(1).getReg(); - if (Opcode == BPF::MOV_32_64) - dst = TRI->getSubReg(dst, BPF::sub_32); - if (dst != src) continue; diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll --- a/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll @@ -56,8 +56,9 @@ ret i32 %c.d } ; CHECK-LABEL: select_cc_32 -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 ; Function Attrs: norecurse nounwind readnone define dso_local i64 @select_cc_32_64(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_addr #0 { @@ -67,8 +68,9 @@ ret i64 %c.d } ; CHECK-LABEL: select_cc_32_64 -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 ; Function Attrs: norecurse nounwind readnone define dso_local i32 @select_cc_64_32(i64 %a, i64 %b, i32 %c, i32 %d) local_unnamed_addr #0 { @@ -88,8 +90,9 @@ ret i32 %c.d } ; CHECK-LABEL: selecti_cc_32 -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 ; Function Attrs: norecurse nounwind readnone define dso_local i64 @selecti_cc_32_64(i32 %a, i64 %c, i64 %d) local_unnamed_addr #0 { @@ -99,8 +102,9 @@ ret i64 %c.d } ; CHECK-LABEL: selecti_cc_32_64 -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 ; Function Attrs: norecurse nounwind readnone define dso_local i32 @selecti_cc_64_32(i64 %a, i32 %c, i32 %d) local_unnamed_addr #0 { diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll --- a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll @@ -27,7 +27,7 @@ %call = tail call i32 @helper(i32 %conv) ret i32 %call } -; CHECK: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} ; CHECK-NOT: r{{[0-9]+}} >>= 32 ; CHECK: if r{{[0-9]+}} == r{{[0-9]+}} goto diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll --- a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll @@ -27,8 +27,8 @@ %call = tail call i32 @helper(i32 %conv) ret i32 %call } -; CHECK: r{{[0-9]+}} >>= 32 -; CHECK: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} ; CHECK: if r{{[0-9]+}} == r{{[0-9]+}} goto declare dso_local i32 @helper(i32) local_unnamed_addr diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-3.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-3.ll --- a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-3.ll +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-3.ll @@ -44,8 +44,9 @@ %exitcond = icmp eq i64 %inc, 100 br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !2 } -; CHECK: [[VAL:r[0-9]+]] <<= 32 -; CHECK: [[VAL]] >>= 32 +; CHECK: [[VAL:r[0-9]+]] = w{{[0-9]+}} +; CHECK-NOT: [[VAL:r[0-9]+]] <<= 32 +; CHECK-NOT: [[VAL]] >>= 32 ; CHECK: if [[VAL]] == 0 goto !2 = distinct !{!2, !3} diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll --- a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll @@ -47,8 +47,9 @@ entry: %cmp = icmp ugt i32 %a, %b %c.d = select i1 %cmp, i64 %c, i64 %d -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 ; CHECK: if r{{[0-9]+}} {{<|>}} r{{[0-9]+}} goto ret i64 %c.d } @@ -58,8 +59,9 @@ ; CHECK-LABEL: select_u_2: entry: %conv = zext i32 %a to i64 -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 %cmp = icmp ugt i64 %conv, %b %c.d = select i1 %cmp, i64 %c, i64 %d ret i64 %c.d @@ -84,10 +86,11 @@ %call = tail call i64 bitcast (i64 (...)* @bar to i64 ()*)() #2 %conv = trunc i64 %call to i32 %cmp = icmp ult i32 %conv, 10 -; The shifts can't be optimized out because %call comes from function call -; returning i64 so the high bits might be valid. -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; %call comes from function call returning i64 so the high bits will need +; to be cleared. +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 %b.c = select i1 %cmp, i32 %b, i32 %c ; CHECK: if r{{[0-9]+}} {{<|>}} {{[0-9]+}} goto ret i32 %b.c @@ -100,8 +103,9 @@ ; CHECK-LABEL: inc_p: entry: %idx.ext = zext i32 %a to i64 -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 %add.ptr = getelementptr inbounds i32, i32* %p, i64 %idx.ext ret i32* %add.ptr } diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-zext.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-zext.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-zext.ll @@ -0,0 +1,21 @@ +; RUN: llc -O2 -march=bpfel -mattr=+alu32 < %s | FileCheck %s +; RUN: llc -O2 -march=bpfel -mcpu=v3 < %s | FileCheck %s +; RUN: llc -O2 -march=bpfeb -mattr=+alu32 < %s | FileCheck %s +; RUN: llc -O2 -march=bpfeb -mcpu=v3 < %s | FileCheck %s +; +; long zext(unsigned int a) +; { +; long b = a; +; return b; +; } + +; Function Attrs: norecurse nounwind +define dso_local i64 @zext(i32 %a) local_unnamed_addr #0 { +entry: + %conv = zext i32 %a to i64 + ; CHECK-NOT: r[[#]] <<= 32 + ; CHECK-NOT: r[[#]] >>= 32 + ret i64 %conv +} + +attributes #0 = { norecurse nounwind }