diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -3405,7 +3405,7 @@
     }
 
     // How many bits are in our mask?
-    uint64_t NumBits = countTrailingOnes(MaskVal);
+    int64_t NumBits = countTrailingOnes(MaskVal);
     Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
 
     if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
@@ -3417,7 +3417,7 @@
         uint64_t StartVal = StartConst->getZExtValue();
         // How many "good" bits do we have left?  "good" is defined here as bits
         // that exist in the original value, not shifted in.
-        uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
+        int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
         if (NumBits > GoodBits) {
           // Do not handle the case where bits have been shifted in. In theory
           // we could handle this, but the cost is likely higher than just
diff --git a/llvm/test/CodeGen/NVPTX/bfe.ll b/llvm/test/CodeGen/NVPTX/bfe.ll
--- a/llvm/test/CodeGen/NVPTX/bfe.ll
+++ b/llvm/test/CodeGen/NVPTX/bfe.ll
@@ -30,3 +30,23 @@
   %val1 = and i32 %val0, 7
   ret i32 %val1
 }
+
+; CHECK: bfe3
+define i32 @bfe3(i32 %a) {
+; CHECK-NOT: bfe %r{{[0-9]+}}, %r{{[0-9]+}}, 31, 4
+; CHECK: shr
+; CHECK: and
+  %val0 = ashr i32 %a, 31
+  %val1 = and i32 %val0, 15
+  ret i32 %val1
+}
+
+; CHECK: bfe4
+define i64 @bfe4(i64 %a) {
+; CHECK-NOT: bfe %r{{[0-9]+}}, %r{{[0-9]+}}, 63, 3
+; CHECK: shr
+; CHECK: and
+  %val0 = ashr i64 %a, 63
+  %val1 = and i64 %val0, 7
+  ret i64 %val1
+}