diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2270,6 +2270,67 @@
   return true;
 }
 
+static void RemovePreallocated(Function *F) {
+  RemoveAttribute(F, Attribute::Preallocated);
+
+  // Cannot modify users() while iterating over it, so make a copy.
+  SmallVector<User *, 4> PreallocatedCalls(F->users());
+  for (User *U : PreallocatedCalls) {
+    CallBase *CB = dyn_cast<CallBase>(U);
+    if (!CB)
+      continue;
+
+    // Create copy of call without "preallocated" operand bundle.
+    SmallVector<OperandBundleDef, 1> OpBundles;
+    CB->getOperandBundlesAsDefs(OpBundles);
+    Value *PreallocatedToken = nullptr;
+    for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) {
+      if (It->getTag() == "preallocated") {
+        PreallocatedToken = *It->input_begin();
+        OpBundles.erase(It);
+        break;
+      }
+    }
+    assert(PreallocatedToken && "Did not find preallocated bundle");
+    CallBase *NewCB = nullptr;
+    if (InvokeInst *II = dyn_cast<InvokeInst>(CB)) {
+      NewCB = InvokeInst::Create(II, OpBundles, CB);
+    } else {
+      CallInst *CI = cast<CallInst>(CB);
+      NewCB = CallInst::Create(CI, OpBundles, CB);
+    }
+    uint64_t W;
+    if (CB->extractProfTotalWeight(W))
+      NewCB->setProfWeight(W);
+    CB->replaceAllUsesWith(NewCB);
+    NewCB->takeName(CB);
+    CB->eraseFromParent();
+
+    // Replace @llvm.call.preallocated.arg() with alloca.
+    // Cannot modify users() while iterating over it, so make a copy.
+    SmallVector<User *, 4> PreallocatedArgs(PreallocatedToken->users());
+    for (auto *User : PreallocatedArgs) {
+      auto *UseCall = cast<CallBase>(User);
+      assert(UseCall->getCalledFunction()->getIntrinsicID() ==
+                 Intrinsic::call_preallocated_arg &&
+             "preallocated token use was not a llvm.call.preallocated.arg");
+      auto AddressSpace = UseCall->getType()->getPointerAddressSpace();
+      auto *ArgType = UseCall
+                          ->getAttribute(AttributeList::FunctionIndex,
+                                         Attribute::Preallocated)
+                          .getValueAsType();
+      auto *Alloca =
+          new AllocaInst(ArgType, AddressSpace, UseCall->getName(), UseCall);
+      auto *BitCast = new BitCastInst(
+          Alloca, Type::getInt8PtrTy(CB->getContext()), "pa_cast", UseCall);
+      UseCall->replaceAllUsesWith(BitCast);
+      UseCall->eraseFromParent();
+    }
+    // Remove @llvm.call.preallocated.setup().
+    cast<Instruction>(PreallocatedToken)->eraseFromParent();
+  }
+}
+
 static bool
 OptimizeFunctions(Module &M,
                   function_ref<TargetLibraryInfo &(Function &)> GetTLI,
@@ -2340,6 +2401,12 @@
       Changed = true;
     }
 
+    if (F->getAttributes().hasAttrSomewhere(Attribute::Preallocated) &&
+        !F->hasAddressTaken()) {
+      RemovePreallocated(F);
+      Changed = true;
+    }
+
     if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
       NumInternalFunc++;
       TargetTransformInfo &TTI = GetTTI(*F);
diff --git a/llvm/test/Transforms/GlobalOpt/fastcc.ll b/llvm/test/Transforms/GlobalOpt/fastcc.ll
--- a/llvm/test/Transforms/GlobalOpt/fastcc.ll
+++ b/llvm/test/Transforms/GlobalOpt/fastcc.ll
@@ -36,8 +36,7 @@
 }
 
 define internal i32 @preallocated(i32* preallocated(i32) %p) {
-; TODO: handle preallocated:
-; CHECK-NOT-LABEL: define internal fastcc i32 @preallocated(i32* %p)
+; CHECK-LABEL: define internal fastcc i32 @preallocated(i32* %p)
   %rv = load i32, i32* %p
   ret i32 %rv
 }
@@ -50,11 +49,10 @@
   call i32 @j(i32* %m)
   %args = alloca inalloca i32
   call i32 @inalloca(i32* inalloca %args)
-  ; TODO: handle preallocated
-  ;%c = call token @llvm.call.preallocated.setup(i32 1)
-  ;%N = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32)
-  ;%n = bitcast i8* %N to i32*
-   ;call i32 @preallocated(i32* preallocated(i32) %n) ["preallocated"(token %c)]
+  %c = call token @llvm.call.preallocated.setup(i32 1)
+  %N = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32)
+  %n = bitcast i8* %N to i32*
+  call i32 @preallocated(i32* preallocated(i32) %n) ["preallocated"(token %c)]
   ret void
 }
 
@@ -68,3 +66,5 @@
 ; CHECK: call coldcc i32 @h
 ; CHECK: call i32 @j
 ; CHECK: call fastcc i32 @inalloca(i32* %args)
+; CHECK-NOT: llvm.call.preallocated
+; CHECK: call fastcc i32 @preallocated(i32* %n)