diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -604,6 +604,12 @@
   DebugLoc DL = MI.getDebugLoc();
 
   MachineRegisterInfo &RegInfo = F->getRegInfo();
+
+  if (!isSigned) {
+    Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
+    BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
+    return PromotedReg0;
+  }
   Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
   Register PromotedReg1 = RegInfo.createVirtualRegister(RC);
   Register PromotedReg2 = RegInfo.createVirtualRegister(RC);
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -732,8 +732,7 @@
 def : Pat<(i64 (sext GPR32:$src)),
           (SRA_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>;
 
-def : Pat<(i64 (zext GPR32:$src)),
-          (SRL_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>;
+def : Pat<(i64 (zext GPR32:$src)), (MOV_32_64 GPR32:$src)>;
 
 // For i64 -> i32 truncation, use the 32-bit subregister directly.
 def : Pat<(i32 (trunc GPR:$src)),
diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-zext.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-zext.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/32-bit-subreg-zext.ll
@@ -0,0 +1,21 @@
+; RUN: llc -O2 -march=bpfel -mattr=+alu32 < %s | FileCheck %s
+; RUN: llc -O2 -march=bpfel -mcpu=v3 < %s | FileCheck %s
+; RUN: llc -O2 -march=bpfeb -mattr=+alu32 < %s | FileCheck %s
+; RUN: llc -O2 -march=bpfeb -mcpu=v3 < %s | FileCheck %s
+;
+; long zext(int a)
+; {
+;   long b = a;
+;   return b;
+; }
+
+; Function Attrs: norecurse nounwind
+define dso_local i64 @zext(i32 %a) local_unnamed_addr #0 {
+entry:
+  %conv = zext i32 %a to i64
+  ; CHECK-NOT: r[[#]] <<= 32
+  ; CHECK-NOT: r[[#]] >>= 32
+  ret i64 %conv
+}
+
+attributes #0 = { norecurse nounwind }