Index: include/llvm-c/Core.h
===================================================================
--- include/llvm-c/Core.h
+++ include/llvm-c/Core.h
@@ -134,7 +134,12 @@
   LLVMCatchRet       = 62,
   LLVMCatchPad       = 63,
   LLVMCleanupPad     = 64,
-  LLVMCatchSwitch    = 65
+  LLVMCatchSwitch    = 65,
+
+  /* Parallel fork-join operators */
+  LLVMFork           = 66,
+  LLVMHalt           = 67,
+  LLVMJoin           = 68
 } LLVMOpcode;
 
 typedef enum {
Index: include/llvm/Bitcode/LLVMBitCodes.h
===================================================================
--- include/llvm/Bitcode/LLVMBitCodes.h
+++ include/llvm/Bitcode/LLVMBitCodes.h
@@ -462,6 +462,10 @@
   // 53 is unused.
   // 54 is unused.
   FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...]
+
+  FUNC_CODE_INST_FORK = 56, // FORK: [bb#, bb#]
+  FUNC_CODE_INST_HALT = 57, // HALT: [bb#]
+  FUNC_CODE_INST_JOIN = 58  // JOIN: [bb#]
 };
 
 enum UseListCodes {
Index: include/llvm/CodeGen/GlobalISel/IRTranslator.h
===================================================================
--- include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -319,6 +319,15 @@
   bool translateCatchSwitch(const User &U, MachineIRBuilder &MIRBuilder) {
     return false;
   }
+  bool translateFork(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateJoin(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateHalt(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
   bool translateFence(const User &U, MachineIRBuilder &MIRBuilder) {
     return false;
   }
Index: include/llvm/IR/InstVisitor.h
===================================================================
--- include/llvm/IR/InstVisitor.h
+++ include/llvm/IR/InstVisitor.h
@@ -172,6 +172,9 @@
   RetTy visitCleanupReturnInst(CleanupReturnInst &I) { DELEGATE(TerminatorInst);}
   RetTy visitCatchReturnInst(CatchReturnInst &I)  { DELEGATE(TerminatorInst); }
   RetTy visitCatchSwitchInst(CatchSwitchInst &I)  { DELEGATE(TerminatorInst);}
+  RetTy visitForkInst(ForkInst &I)                { DELEGATE(TerminatorInst); }
+  RetTy visitHaltInst(HaltInst &I)                { DELEGATE(TerminatorInst); }
+  RetTy visitJoinInst(JoinInst &I)                { DELEGATE(TerminatorInst); }
   RetTy visitICmpInst(ICmpInst &I)                { DELEGATE(CmpInst);}
   RetTy visitFCmpInst(FCmpInst &I)                { DELEGATE(CmpInst);}
   RetTy visitAllocaInst(AllocaInst &I)            { DELEGATE(UnaryInstruction);}
Index: include/llvm/IR/Instruction.def
===================================================================
--- include/llvm/IR/Instruction.def
+++ include/llvm/IR/Instruction.def
@@ -117,84 +117,87 @@
 HANDLE_TERM_INST  ( 8, CleanupRet    , CleanupReturnInst)
 HANDLE_TERM_INST  ( 9, CatchRet      , CatchReturnInst)
 HANDLE_TERM_INST  (10, CatchSwitch   , CatchSwitchInst)
-  LAST_TERM_INST  (10)
+HANDLE_TERM_INST  (11, Fork          , ForkInst)
+HANDLE_TERM_INST  (12, Halt          , HaltInst)
+HANDLE_TERM_INST  (13, Join          , JoinInst)
+  LAST_TERM_INST  (13)
 
 // Standard binary operators...
- FIRST_BINARY_INST(11)
-HANDLE_BINARY_INST(11, Add  , BinaryOperator)
-HANDLE_BINARY_INST(12, FAdd , BinaryOperator)
-HANDLE_BINARY_INST(13, Sub  , BinaryOperator)
-HANDLE_BINARY_INST(14, FSub , BinaryOperator)
-HANDLE_BINARY_INST(15, Mul  , BinaryOperator)
-HANDLE_BINARY_INST(16, FMul , BinaryOperator)
-HANDLE_BINARY_INST(17, UDiv , BinaryOperator)
-HANDLE_BINARY_INST(18, SDiv , BinaryOperator)
-HANDLE_BINARY_INST(19, FDiv , BinaryOperator)
-HANDLE_BINARY_INST(20, URem , BinaryOperator)
-HANDLE_BINARY_INST(21, SRem , BinaryOperator)
-HANDLE_BINARY_INST(22, FRem , BinaryOperator)
+ FIRST_BINARY_INST(14)
+HANDLE_BINARY_INST(14, Add  , BinaryOperator)
+HANDLE_BINARY_INST(15, FAdd , BinaryOperator)
+HANDLE_BINARY_INST(16, Sub  , BinaryOperator)
+HANDLE_BINARY_INST(17, FSub , BinaryOperator)
+HANDLE_BINARY_INST(18, Mul  , BinaryOperator)
+HANDLE_BINARY_INST(19, FMul , BinaryOperator)
+HANDLE_BINARY_INST(20, UDiv , BinaryOperator)
+HANDLE_BINARY_INST(21, SDiv , BinaryOperator)
+HANDLE_BINARY_INST(22, FDiv , BinaryOperator)
+HANDLE_BINARY_INST(23, URem , BinaryOperator)
+HANDLE_BINARY_INST(24, SRem , BinaryOperator)
+HANDLE_BINARY_INST(25, FRem , BinaryOperator)
 
 // Logical operators (integer operands)
-HANDLE_BINARY_INST(23, Shl  , BinaryOperator) // Shift left  (logical)
-HANDLE_BINARY_INST(24, LShr , BinaryOperator) // Shift right (logical)
-HANDLE_BINARY_INST(25, AShr , BinaryOperator) // Shift right (arithmetic)
-HANDLE_BINARY_INST(26, And  , BinaryOperator)
-HANDLE_BINARY_INST(27, Or   , BinaryOperator)
-HANDLE_BINARY_INST(28, Xor  , BinaryOperator)
-  LAST_BINARY_INST(28)
+HANDLE_BINARY_INST(26, Shl  , BinaryOperator) // Shift left  (logical)
+HANDLE_BINARY_INST(27, LShr , BinaryOperator) // Shift right (logical)
+HANDLE_BINARY_INST(28, AShr , BinaryOperator) // Shift right (arithmetic)
+HANDLE_BINARY_INST(29, And  , BinaryOperator)
+HANDLE_BINARY_INST(30, Or   , BinaryOperator)
+HANDLE_BINARY_INST(31, Xor  , BinaryOperator)
+  LAST_BINARY_INST(31)
 
 // Memory operators...
- FIRST_MEMORY_INST(29)
-HANDLE_MEMORY_INST(29, Alloca, AllocaInst)  // Stack management
-HANDLE_MEMORY_INST(30, Load  , LoadInst  )  // Memory manipulation instrs
-HANDLE_MEMORY_INST(31, Store , StoreInst )
-HANDLE_MEMORY_INST(32, GetElementPtr, GetElementPtrInst)
-HANDLE_MEMORY_INST(33, Fence , FenceInst )
-HANDLE_MEMORY_INST(34, AtomicCmpXchg , AtomicCmpXchgInst )
-HANDLE_MEMORY_INST(35, AtomicRMW , AtomicRMWInst )
-  LAST_MEMORY_INST(35)
+ FIRST_MEMORY_INST(32)
+HANDLE_MEMORY_INST(32, Alloca, AllocaInst)  // Stack management
+HANDLE_MEMORY_INST(33, Load  , LoadInst  )  // Memory manipulation instrs
+HANDLE_MEMORY_INST(34, Store , StoreInst )
+HANDLE_MEMORY_INST(35, GetElementPtr, GetElementPtrInst)
+HANDLE_MEMORY_INST(36, Fence , FenceInst )
+HANDLE_MEMORY_INST(37, AtomicCmpXchg , AtomicCmpXchgInst )
+HANDLE_MEMORY_INST(38, AtomicRMW , AtomicRMWInst )
+  LAST_MEMORY_INST(38)
 
 // Cast operators ...
 // NOTE: The order matters here because CastInst::isEliminableCastPair
 // NOTE: (see Instructions.cpp) encodes a table based on this ordering.
- FIRST_CAST_INST(36)
-HANDLE_CAST_INST(36, Trunc   , TruncInst   )  // Truncate integers
-HANDLE_CAST_INST(37, ZExt    , ZExtInst    )  // Zero extend integers
-HANDLE_CAST_INST(38, SExt    , SExtInst    )  // Sign extend integers
-HANDLE_CAST_INST(39, FPToUI  , FPToUIInst  )  // floating point -> UInt
-HANDLE_CAST_INST(40, FPToSI  , FPToSIInst  )  // floating point -> SInt
-HANDLE_CAST_INST(41, UIToFP  , UIToFPInst  )  // UInt -> floating point
-HANDLE_CAST_INST(42, SIToFP  , SIToFPInst  )  // SInt -> floating point
-HANDLE_CAST_INST(43, FPTrunc , FPTruncInst )  // Truncate floating point
-HANDLE_CAST_INST(44, FPExt   , FPExtInst   )  // Extend floating point
-HANDLE_CAST_INST(45, PtrToInt, PtrToIntInst)  // Pointer -> Integer
-HANDLE_CAST_INST(46, IntToPtr, IntToPtrInst)  // Integer -> Pointer
-HANDLE_CAST_INST(47, BitCast , BitCastInst )  // Type cast
-HANDLE_CAST_INST(48, AddrSpaceCast, AddrSpaceCastInst)  // addrspace cast
-  LAST_CAST_INST(48)
-
- FIRST_FUNCLETPAD_INST(49)
-HANDLE_FUNCLETPAD_INST(49, CleanupPad, CleanupPadInst)
-HANDLE_FUNCLETPAD_INST(50, CatchPad  , CatchPadInst)
-  LAST_FUNCLETPAD_INST(50)
+ FIRST_CAST_INST(39)
+HANDLE_CAST_INST(39, Trunc   , TruncInst   )  // Truncate integers
+HANDLE_CAST_INST(40, ZExt    , ZExtInst    )  // Zero extend integers
+HANDLE_CAST_INST(41, SExt    , SExtInst    )  // Sign extend integers
+HANDLE_CAST_INST(42, FPToUI  , FPToUIInst  )  // floating point -> UInt
+HANDLE_CAST_INST(43, FPToSI  , FPToSIInst  )  // floating point -> SInt
+HANDLE_CAST_INST(44, UIToFP  , UIToFPInst  )  // UInt -> floating point
+HANDLE_CAST_INST(45, SIToFP  , SIToFPInst  )  // SInt -> floating point
+HANDLE_CAST_INST(46, FPTrunc , FPTruncInst )  // Truncate floating point
+HANDLE_CAST_INST(47, FPExt   , FPExtInst   )  // Extend floating point
+HANDLE_CAST_INST(48, PtrToInt, PtrToIntInst)  // Pointer -> Integer
+HANDLE_CAST_INST(49, IntToPtr, IntToPtrInst)  // Integer -> Pointer
+HANDLE_CAST_INST(50, BitCast , BitCastInst )  // Type cast
+HANDLE_CAST_INST(51, AddrSpaceCast, AddrSpaceCastInst)  // addrspace cast
+  LAST_CAST_INST(51)
+
+ FIRST_FUNCLETPAD_INST(52)
+HANDLE_FUNCLETPAD_INST(52, CleanupPad, CleanupPadInst)
+HANDLE_FUNCLETPAD_INST(53, CatchPad  , CatchPadInst)
+  LAST_FUNCLETPAD_INST(53)
 
 // Other operators...
- FIRST_OTHER_INST(51)
-HANDLE_OTHER_INST(51, ICmp   , ICmpInst   )  // Integer comparison instruction
-HANDLE_OTHER_INST(52, FCmp   , FCmpInst   )  // Floating point comparison instr.
-HANDLE_OTHER_INST(53, PHI    , PHINode    )  // PHI node instruction
-HANDLE_OTHER_INST(54, Call   , CallInst   )  // Call a function
-HANDLE_OTHER_INST(55, Select , SelectInst )  // select instruction
-HANDLE_OTHER_INST(56, UserOp1, Instruction)  // May be used internally in a pass
-HANDLE_OTHER_INST(57, UserOp2, Instruction)  // Internal to passes only
-HANDLE_OTHER_INST(58, VAArg  , VAArgInst  )  // vaarg instruction
-HANDLE_OTHER_INST(59, ExtractElement, ExtractElementInst)// extract from vector
-HANDLE_OTHER_INST(60, InsertElement, InsertElementInst)  // insert into vector
-HANDLE_OTHER_INST(61, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
-HANDLE_OTHER_INST(62, ExtractValue, ExtractValueInst)// extract from aggregate
-HANDLE_OTHER_INST(63, InsertValue, InsertValueInst)  // insert into aggregate
-HANDLE_OTHER_INST(64, LandingPad, LandingPadInst)  // Landing pad instruction.
-  LAST_OTHER_INST(64)
+ FIRST_OTHER_INST(54)
+HANDLE_OTHER_INST(54, ICmp   , ICmpInst   )  // Integer comparison instruction
+HANDLE_OTHER_INST(55, FCmp   , FCmpInst   )  // Floating point comparison instr.
+HANDLE_OTHER_INST(56, PHI    , PHINode    )  // PHI node instruction
+HANDLE_OTHER_INST(57, Call   , CallInst   )  // Call a function
+HANDLE_OTHER_INST(58, Select , SelectInst )  // select instruction
+HANDLE_OTHER_INST(59, UserOp1, Instruction)  // May be used internally in a pass
+HANDLE_OTHER_INST(60, UserOp2, Instruction)  // Internal to passes only
+HANDLE_OTHER_INST(61, VAArg  , VAArgInst  )  // vaarg instruction
+HANDLE_OTHER_INST(62, ExtractElement, ExtractElementInst)// extract from vector
+HANDLE_OTHER_INST(63, InsertElement, InsertElementInst)  // insert into vector
+HANDLE_OTHER_INST(64, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
+HANDLE_OTHER_INST(65, ExtractValue, ExtractValueInst)// extract from aggregate
+HANDLE_OTHER_INST(66, InsertValue, InsertValueInst)  // insert into aggregate
+HANDLE_OTHER_INST(67, LandingPad, LandingPadInst)  // Landing pad instruction.
+  LAST_OTHER_INST(67)
 
 #undef  FIRST_TERM_INST
 #undef HANDLE_TERM_INST
Index: include/llvm/IR/Instructions.h
===================================================================
--- include/llvm/IR/Instructions.h
+++ include/llvm/IR/Instructions.h
@@ -4508,6 +4508,226 @@
 };
 
 //===----------------------------------------------------------------------===//
+//                                 ForkInst Class
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------------
+/// This class represents a fork terminator instruction
+  class ForkInst : public TerminatorInst {
+    /// Ops list - The operand are ordered: forked, continue
+    ForkInst(const ForkInst &FI);
+    void AssertOK();
+    // ForkInst constructors (where {F, C} are blocks):
+    // ForkInst(BB *F, BB *C)          - 'fork D, C'
+    // ForkInst(BB *F, BB *C, Inst *I) - 'fork D, C', insert before
+    // ForkInst(BB *F, BB *C, BB *I)   - 'fork D, C', insert at end
+    ForkInst(BasicBlock *Forked, BasicBlock *Continue,
+             Instruction *InsertBefore = nullptr);
+    ForkInst(BasicBlock *Forked, BasicBlock *Continue,
+             BasicBlock *InsertAtEnd);
+
+  protected:
+    // Note: Instruction needs to be friend here to call cloneImpl
+    friend class Instruction;
+    ForkInst *cloneImpl() const;
+
+  public:
+    static ForkInst *Create(BasicBlock *Forked, BasicBlock *Continue,
+                            Instruction *InsertBefore = nullptr) {
+      return new(2) ForkInst(Forked, Continue, InsertBefore);
+    }
+    static ForkInst *Create(BasicBlock *Forked, BasicBlock *Continue,
+                            BasicBlock *InsertAtEnd) {
+      return new(2) ForkInst(Forked, Continue, InsertAtEnd);
+    }
+
+    /// Transparently provide more efficient getOperand methods.
+    DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+    unsigned getNumSuccessors() const { return 2; }
+
+    BasicBlock *getSuccessor(unsigned i) const {
+      assert(i < 2 && "Successor # out of range for fork!");
+      return cast_or_null<BasicBlock>((&Op<-1>() - i)->get());
+    }
+
+    void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
+      assert(idx < 2 && "Successor # out of range for fork!");
+      *(&Op<-1>() - idx) = (Value*)NewSucc;
+    }
+
+    /// Swap the successors of this fork instruction.
+    ///
+    /// Swaps the successors of the fork instruction. This also swaps any
+    /// branch weight metadata associated with the instruction so that it
+    /// continues to map correctly to each operand.
+    void swapSuccessors();
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const Instruction *I) {
+      return (I->getOpcode() == Instruction::Fork);
+    }
+    static inline bool classof(const Value *V) {
+      return isa<Instruction>(V) && classof(cast<Instruction>(V));
+    }
+
+    inline BasicBlock* getForked() const { return getSuccessor(0); }
+    inline BasicBlock* getContinue() const { return getSuccessor(1); }
+  private:
+    BasicBlock *getSuccessorV(unsigned idx) const override;
+    unsigned getNumSuccessorsV() const override;
+    void setSuccessorV(unsigned idx, BasicBlock *B) override;
+  };
+
+  template<>
+  struct OperandTraits<ForkInst> : public  VariadicOperandTraits<ForkInst, 1> {
+  };
+
+  DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ForkInst, Value)
+
+  //===----------------------------------------------------------------------===//
+  //                                 HaltInst Class
+  //===----------------------------------------------------------------------===//
+
+  //===---------------------------------------------------------------------------
+  /// HaltInst - Halt Instruction. This instruction terminates
+  /// a subCFG and has no successors. The ForkContinue field
+  /// maintianing the continue block after the detach instruction
+  /// corresponding to this halt
+  class HaltInst : public TerminatorInst {
+    HaltInst(const HaltInst &HI);
+    void AssertOK();
+    // HaltInst constructors (where F is a block):
+    // HaltInst(BB *F)          - 'reattach D'
+    // HaltInst(BB *F, Inst *I) - 'reattach D'    insert before I
+    // HaltInst(BB *F, BB *I)   - 'reattach D'    insert at end
+    explicit HaltInst(LLVMContext &C, BasicBlock *ForkContinue,
+                      Instruction *InsertBefore = nullptr);
+    HaltInst(LLVMContext &C, BasicBlock *ForkContinue,
+             BasicBlock *InsertAtEnd);
+  protected:
+    // Note: Instruction needs to be friend here to call cloneImpl.
+    friend class Instruction;
+    HaltInst *cloneImpl() const;
+
+  public:
+    static HaltInst *Create(LLVMContext &C, BasicBlock *ForkContinue,
+                            Instruction *InsertBefore = nullptr) {
+      return new(1) HaltInst(C, ForkContinue, InsertBefore);
+    }
+    static HaltInst *Create(LLVMContext &C, BasicBlock *ForkContinue,
+                            BasicBlock *InsertAtEnd) {
+      return new(1) HaltInst(C, ForkContinue, InsertAtEnd);
+    }
+
+    /// Transparently provide more efficient getOperand methods.
+    DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+    unsigned getNumSuccessors() const { return 1; }
+
+    BasicBlock *getSuccessor(unsigned i) const {
+      assert(i == 0 && "Successor # out of range for halt!");
+      return cast_or_null<BasicBlock>((&Op<-1>() - i)->get());
+    }
+
+    void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
+      assert(idx == 0 && "Successor # out of range for halt!");
+      *(&Op<-1>() - idx) = (Value*)NewSucc;
+    }
+
+    BasicBlock *getForkContinue() const {
+      return cast_or_null<BasicBlock>((&Op<-1>())->get());
+    }
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const Instruction *I) {
+      return (I->getOpcode() == Instruction::Halt);
+    }
+    static inline bool classof(const Value *V) {
+      return isa<Instruction>(V) && classof(cast<Instruction>(V));
+    }
+
+  private:
+    BasicBlock *getSuccessorV(unsigned idx) const override;
+    unsigned getNumSuccessorsV() const override;
+    void setSuccessorV(unsigned idx, BasicBlock *B) override;
+ };
+
+  template<>
+  struct OperandTraits<HaltInst> : public VariadicOperandTraits<HaltInst, 1> {
+  };
+
+  DEFINE_TRANSPARENT_OPERAND_ACCESSORS(HaltInst, Value)
+
+  ///===----------------------------------------------------------------------===//
+  //                                 JoinInst Class
+  //===----------------------------------------------------------------------===//
+
+  //===---------------------------------------------------------------------------
+  /// JoinInst - Join Instruction
+  class JoinInst : public TerminatorInst {
+    /// Ops list - A join is like an unconditional branch to its continuation.
+    JoinInst(const JoinInst &SI);
+    void AssertOK();
+    // JoinInst constructor (where C is a block):
+    // JoinInst(BB *C)           - 'sync C'
+    // JoinInst(BB *C, Inst *I)  - 'sync C'    insert before I
+    // JoinInst(BB *C, BB *I)    - 'sync C'    insert at end
+    explicit JoinInst(BasicBlock *Continue, Instruction *InsertBefore = nullptr);
+    JoinInst(BasicBlock *Continue, BasicBlock *InsertAtEnd);
+  protected:
+    // Note: Instruction needs to be friend here to call cloneImpl.
+    friend class Instruction;
+    JoinInst *cloneImpl() const;
+
+  public:
+    static JoinInst *Create(BasicBlock *Continue, Instruction *InsertBefore = nullptr) {
+      return new(1) JoinInst(Continue, InsertBefore);
+    }
+    static JoinInst *Create(BasicBlock *Continue, BasicBlock *InsertAtEnd) {
+      return new(1) JoinInst(Continue, InsertAtEnd);
+    }
+
+    /// Transparently provide more efficient getOperand methods.
+    DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+    unsigned getNumSuccessors() const { return 1; }
+
+    BasicBlock *getSuccessor(unsigned i) const {
+      assert(i == 0 && "Successor # out of range for join!");
+      return cast<BasicBlock>((&Op<-1>() - i)->get());
+    }
+
+    void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
+      assert(idx == 0 && "Successor # out of range for join!");
+      *(&Op<-1>() - idx) = (Value*)NewSucc;
+    }
+
+    BasicBlock *getContinue() const {
+      return getSuccessor(0);
+    }
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const Instruction *I) {
+      return (I->getOpcode() == Instruction::Join);
+    }
+    static inline bool classof(const Value *V) {
+      return isa<Instruction>(V) && classof(cast<Instruction>(V));
+    }
+
+  private:
+    BasicBlock *getSuccessorV(unsigned idx) const override;
+    unsigned getNumSuccessorsV() const override;
+    void setSuccessorV(unsigned idx, BasicBlock *B) override;
+  };
+
+  template<>
+  struct OperandTraits<JoinInst> : public VariadicOperandTraits<JoinInst, 1> {
+  };
+
+  DEFINE_TRANSPARENT_OPERAND_ACCESSORS(JoinInst, Value)
+
+//===----------------------------------------------------------------------===//
 //                                 TruncInst Class
 //===----------------------------------------------------------------------===//
 
Index: lib/AsmParser/LLLexer.cpp
===================================================================
--- lib/AsmParser/LLLexer.cpp
+++ lib/AsmParser/LLLexer.cpp
@@ -763,6 +763,9 @@
   INSTKEYWORD(invoke,      Invoke);
   INSTKEYWORD(resume,      Resume);
   INSTKEYWORD(unreachable, Unreachable);
+  INSTKEYWORD(fork,        Fork);
+  INSTKEYWORD(halt,        Halt);
+  INSTKEYWORD(join,        Join);
 
   INSTKEYWORD(alloca,      Alloca);
   INSTKEYWORD(load,        Load);
Index: lib/AsmParser/LLParser.h
===================================================================
--- lib/AsmParser/LLParser.h
+++ lib/AsmParser/LLParser.h
@@ -465,6 +465,9 @@
 
     bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
     bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseFork(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseHalt(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseJoin(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
Index: lib/AsmParser/LLParser.cpp
===================================================================
--- lib/AsmParser/LLParser.cpp
+++ lib/AsmParser/LLParser.cpp
@@ -5025,6 +5025,9 @@
   case lltok::kw_catchswitch: return ParseCatchSwitch(Inst, PFS);
   case lltok::kw_catchpad:    return ParseCatchPad(Inst, PFS);
   case lltok::kw_cleanuppad:  return ParseCleanupPad(Inst, PFS);
+  case lltok::kw_fork:        return ParseFork(Inst, PFS);
+  case lltok::kw_halt:        return ParseHalt(Inst, PFS);
+  case lltok::kw_join:        return ParseJoin(Inst, PFS);
   // Binary Operators.
   case lltok::kw_add:
   case lltok::kw_sub:
@@ -5224,6 +5227,47 @@
   return false;
 }
 
+/// ParseFork
+///    ::= 'fork' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseFork(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy Loc, Loc2;
+  BasicBlock *Op1, *Op2;
+
+  if (ParseTypeAndBasicBlock(Op1, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after true destination") ||
+      ParseTypeAndBasicBlock(Op2, Loc2, PFS))
+    return true;
+
+  Inst = ForkInst::Create(Op1, Op2);
+  return false;
+}
+
+/// ParseHalt
+///   ::= 'halt' TypeAndValue
+bool LLParser::ParseHalt(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy Loc;
+  BasicBlock *Op;
+
+  if (ParseTypeAndBasicBlock(Op, Loc, PFS))
+    return true;
+
+  Inst = HaltInst::Create(Context, Op);
+  return false;
+}
+
+/// ParseJoin
+///   ::= 'join' TypeAndValue
+bool LLParser::ParseJoin(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy Loc;
+  BasicBlock *Op;
+
+  if (ParseTypeAndBasicBlock(Op, Loc, PFS))
+    return true;
+
+  Inst = JoinInst::Create(Op);
+  return false;
+}
+
 /// ParseSwitch
 ///  Instruction
 ///    ::= 'switch' TypeAndValue ',' TypeAndValue '[' JumpTable ']'
Index: lib/AsmParser/LLToken.h
===================================================================
--- lib/AsmParser/LLToken.h
+++ lib/AsmParser/LLToken.h
@@ -325,6 +325,11 @@
   kw_insertvalue,
   kw_blockaddress,
 
+  // Parallel IR types
+  kw_fork,
+  kw_halt,
+  kw_join,
+
   // Metadata types.
   kw_distinct,
 
Index: lib/Bitcode/Reader/BitcodeReader.cpp
===================================================================
--- lib/Bitcode/Reader/BitcodeReader.cpp
+++ lib/Bitcode/Reader/BitcodeReader.cpp
@@ -3909,6 +3909,45 @@
       I = new UnreachableInst(Context);
       InstructionList.push_back(I);
       break;
+    case bitc::FUNC_CODE_INST_FORK: { // FORK: [bb#, bb#]
+      if (Record.size() != 2)
+        return error("Invalid record");
+      BasicBlock *Forked = getBasicBlock(Record[0]);
+      if (!Forked)
+        return error("Invalid record");
+
+      BasicBlock *Continue = getBasicBlock(Record[1]);
+      if (!Continue)
+        return error("Invalid record");
+
+      I = ForkInst::Create(Forked, Continue);
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_HALT: { // HALT: [bb#]
+      if (Record.size() != 1)
+        return error("Invalid record");
+
+      BasicBlock *ForkContinue = getBasicBlock(Record[0]);
+      if (!ForkContinue)
+        return error("Invalid record");
+
+      I = HaltInst::Create(Context, ForkContinue);
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_JOIN: { // JOIN: [bb#]
+      if (Record.size() != 1)
+        return error("Invalid record");
+
+      BasicBlock *Continue = getBasicBlock(Record[0]);
+      if (!Continue)
+        return error("Invalid record");
+
+      I = JoinInst::Create(Continue);
+      InstructionList.push_back(I);
+      break;
+    }
     case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
       if (Record.size() < 1 || ((Record.size()-1)&1))
         return error("Invalid record");
Index: lib/Bitcode/Writer/BitcodeWriter.cpp
===================================================================
--- lib/Bitcode/Writer/BitcodeWriter.cpp
+++ lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -2654,7 +2654,25 @@
     Code = bitc::FUNC_CODE_INST_UNREACHABLE;
     AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
     break;
-
+  case Instruction::Fork: {
+    Code = bitc::FUNC_CODE_INST_FORK;
+    const ForkInst &FI = cast<ForkInst>(I);
+    pushValue(FI.getSuccessor(0), InstID, Vals);
+    pushValue(FI.getSuccessor(1), InstID, Vals);
+    break;
+  }
+  case Instruction::Halt: {
+    Code = bitc::FUNC_CODE_INST_HALT;
+    const HaltInst &HI = cast<HaltInst>(I);
+    Vals.push_back(VE.getValueID(HI.getSuccessor(0)));
+    break;
+  }
+  case Instruction::Join: {
+    Code = bitc::FUNC_CODE_INST_JOIN;
+    const JoinInst &JI = cast<JoinInst>(I);
+    Vals.push_back(VE.getValueID(JI.getSuccessor(0)));
+    break;
+  }
   case Instruction::PHI: {
     const PHINode &PN = cast<PHINode>(I);
     Code = bitc::FUNC_CODE_INST_PHI;
Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -40,6 +40,7 @@
 class ExtractElementInst;
 class ExtractValueInst;
 class FCmpInst;
+class ForkInst;
 class FPExtInst;
 class FPToSIInst;
 class FPToUIInst;
@@ -48,6 +49,7 @@
 class FunctionLoweringInfo;
 class GetElementPtrInst;
 class GCFunctionInfo;
+class HaltInst;
 class ICmpInst;
 class IntToPtrInst;
 class IndirectBrInst;
@@ -55,6 +57,7 @@
 class InsertElementInst;
 class InsertValueInst;
 class Instruction;
+class JoinInst;
 class LoadInst;
 class MachineBasicBlock;
 class MachineInstr;
@@ -795,6 +798,9 @@
   void visitCatchRet(const CatchReturnInst &I);
   void visitCatchPad(const CatchPadInst &I);
   void visitCleanupPad(const CleanupPadInst &CPI);
+  void visitFork(const ForkInst &I);
+  void visitHalt(const HaltInst &I);
+  void visitJoin(const JoinInst &I);
 
   BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
                                        const MachineBasicBlock *Dst) const;
Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2448,6 +2448,60 @@
         DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
 }
 
+void SelectionDAGBuilder::visitFork(const llvm::ForkInst &I) {
+  MachineBasicBlock *ForkMBB = FuncInfo.MBB;
+
+  MachineBasicBlock *Forked = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+  // Update machine-CFG edges
+  ForkMBB->addSuccessor(Forked);
+
+  // If this not a fall-trough branch or optimizations are switched off,
+  // emit the branch
+  if (Forked != NextBlock(ForkMBB) || TM.getOptLevel() == CodeGenOpt::None)
+    DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
+                            MVT::Other, getControlRoot(),
+                            DAG.getBasicBlock(Forked)));
+
+  return;
+}
+
+void SelectionDAGBuilder::visitHalt(const llvm::HaltInst &I) {
+  MachineBasicBlock *HaltMBB = FuncInfo.MBB;
+
+  MachineBasicBlock *Continue = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+  // Update machine-CFG edges.
+  HaltMBB->addSuccessor(Continue);
+
+  // If this not a fall-trough branch or optimizations are switched off,
+  // emit the branch
+  if (Continue != NextBlock(HaltMBB) || TM.getOptLevel() == CodeGenOpt::None)
+    DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
+                            MVT::Other, getControlRoot(),
+                            DAG.getBasicBlock(Continue)));
+
+  return;
+}
+
+void SelectionDAGBuilder::visitJoin(const llvm::JoinInst &I) {
+  MachineBasicBlock *JoinMBB = FuncInfo.MBB;
+
+  MachineBasicBlock *Continue = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+  // Update machine-CFG edges.
+  JoinMBB->addSuccessor(Continue);
+
+  // If this not a fall-trough branch or optimizations are switched off,
+  // emit the branch
+  if (Continue != NextBlock(JoinMBB) || TM.getOptLevel() == CodeGenOpt::None)
+    DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
+                            MVT::Other, getControlRoot(),
+                            DAG.getBasicBlock(Continue)));
+
+  return;
+}
+
 void SelectionDAGBuilder::visitFSub(const User &I) {
   // -0.0 - X --> fneg
   Type *Ty = I.getType();
Index: lib/CodeGen/TargetLoweringBase.cpp
===================================================================
--- lib/CodeGen/TargetLoweringBase.cpp
+++ lib/CodeGen/TargetLoweringBase.cpp
@@ -915,7 +915,7 @@
     setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
 
     setOperationAction(ISD::BITREVERSE, VT, Expand);
-    
+
     // These library functions default to expand.
     setOperationAction(ISD::FROUND, VT, Expand);
 
@@ -1207,7 +1207,7 @@
   // STATEPOINT Deopt Spill - live-through, read only, indirect
   // STATEPOINT Deopt Alloca - live-through, read only, direct
   // (We're currently conservative and mark the deopt slots read/write in
-  // practice.) 
+  // practice.)
   // STATEPOINT GC Spill - live-through, read/write, indirect
   // STATEPOINT GC Alloca - live-through, read/write, direct
   // The live-in vs live-through is handled already (the live through ones are
@@ -1660,7 +1660,7 @@
       *Fast = true;
     return true;
   }
-  
+
   // This is a misaligned access.
   return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast);
 }
@@ -1692,6 +1692,9 @@
   case CatchPad:       return 0;
   case CatchSwitch:    return 0;
   case CleanupPad:     return 0;
+  case Fork:           return 0;
+  case Join:           return 0;
+  case Halt:           return 0;
   case Add:            return ISD::ADD;
   case FAdd:           return ISD::FADD;
   case Sub:            return ISD::SUB;
Index: lib/IR/Instruction.cpp
===================================================================
--- lib/IR/Instruction.cpp
+++ lib/IR/Instruction.cpp
@@ -258,6 +258,9 @@
   case CatchRet: return "catchret";
   case CatchPad: return "catchpad";
   case CatchSwitch: return "catchswitch";
+  case Fork:        return "fork";
+  case Halt:        return "halt";
+  case Join:        return "join";
 
   // Standard binary operators...
   case Add: return "add";
Index: lib/IR/Instructions.cpp
===================================================================
--- lib/IR/Instructions.cpp
+++ lib/IR/Instructions.cpp
@@ -1098,6 +1098,158 @@
 }
 
 //===----------------------------------------------------------------------===//
+//                        ForkInst Implementation
+//===----------------------------------------------------------------------===//
+
+void ForkInst::AssertOK() {
+}
+
+ForkInst::ForkInst(BasicBlock *Forked, BasicBlock *Continue, Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(Forked->getContext()), Instruction::Fork,
+                   OperandTraits<ForkInst>::op_end(this) - 2,
+                   2, InsertBefore) {
+  Op<-1>() = Forked;
+  Op<-2>() = Continue;
+#ifndef NDEBUG
+  AssertOK();
+#endif
+}
+
+ForkInst::ForkInst(const ForkInst &FI) :
+  TerminatorInst(Type::getVoidTy(FI.getContext()), Instruction::Fork,
+                 OperandTraits<ForkInst>::op_end(this) - FI.getNumOperands(),
+                 FI.getNumOperands()) {
+  Op<-1>() = FI.Op<-1>();
+  Op<-2>() = FI.Op<-2>();
+  assert(FI.getNumOperands() == 2 && "Fork must have 2 operands!");
+  SubclassOptionalData = FI.SubclassOptionalData;
+}
+
+void ForkInst::swapSuccessors() {
+  Op<-1>().swap(Op<-2>());
+
+  // Update profile metadata if present and it matches our structrual
+  // expectations.
+  MDNode *ProfileData = getMetadata(LLVMContext::MD_prof);
+  if (!ProfileData || ProfileData->getNumOperands() != 2)
+    return;
+
+  // The first operand is the name. Fetch them backwards and build a new one.
+  Metadata *Ops[] = {ProfileData->getOperand(1), ProfileData->getOperand(0)};
+
+  setMetadata(LLVMContext::MD_prof, MDNode::get(ProfileData->getContext(), Ops));
+}
+
+BasicBlock *ForkInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+unsigned ForkInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void ForkInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  assert(idx <= 1);
+  setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
+//                        HaltInst Implementation
+//===----------------------------------------------------------------------===//
+
+void HaltInst::AssertOK() {
+}
+
+HaltInst::HaltInst(LLVMContext &C, BasicBlock *ForkContinue,
+                   Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(C), Instruction::Halt,
+                   OperandTraits<HaltInst>::op_end(this) - 1, 1,
+                   InsertBefore) {
+  Op<-1>() = ForkContinue;
+#ifndef NDEBUG
+  AssertOK();
+#endif
+}
+HaltInst::HaltInst(LLVMContext &C, BasicBlock *ForkContinue,
+                   BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(C), Instruction::Halt,
+                   OperandTraits<HaltInst>::op_end(this) - 1, 1,
+                   InsertAtEnd) {
+  Op<-1>() = ForkContinue;
+#ifndef NDEBUG
+  AssertOK();
+#endif
+}
+
+HaltInst::HaltInst(const HaltInst &HI)
+  : TerminatorInst(Type::getVoidTy(HI.getContext()), Instruction::Halt,
+                   OperandTraits<HaltInst>::op_end(this) - HI.getNumOperands(),
+                   HI.getNumOperands()) {
+  Op<-1>() = HI.Op<-1>();
+  assert(HI.getNumOperands() == 1 && "Halt must have 1 operand!");
+  SubclassOptionalData = HI.SubclassOptionalData;
+}
+
+unsigned HaltInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+
+BasicBlock *HaltInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+
+void HaltInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  assert(idx == 0);
+  setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
+//                        JoinInst Implementation
+//===----------------------------------------------------------------------===//
+
+void JoinInst::AssertOK() {
+}
+
+JoinInst::JoinInst(BasicBlock *Continue, Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(Continue->getContext()), Instruction::Join,
+                   OperandTraits<JoinInst>::op_end(this) - 1,
+                   1, InsertBefore) {
+  Op<-1>() = Continue;
+#ifndef NDEBUG
+  AssertOK();
+#endif
+}
+JoinInst::JoinInst(BasicBlock *Continue, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Continue->getContext()), Instruction::Join,
+                   OperandTraits<JoinInst>::op_end(this) - 1,
+                   1, InsertAtEnd) {
+  Op<-1>() = Continue;
+#ifndef NDEBUG
+  AssertOK();
+#endif
+}
+
+JoinInst::JoinInst(const JoinInst &JI)
+  : TerminatorInst(Type::getVoidTy(JI.getContext()), Instruction::Join,
+                   OperandTraits<JoinInst>::op_end(this) - JI.getNumOperands(),
+                   JI.getNumOperands()) {
+  Op<-1>() = JI.Op<-1>();
+  assert(JI.getNumOperands() == 1 && "Join must have 1 operand!");
+  SubclassOptionalData = JI.SubclassOptionalData;
+}
+
+unsigned JoinInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+
+BasicBlock *JoinInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+
+void JoinInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  assert(idx == 0);
+  setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
 //                        BranchInst Implementation
 //===----------------------------------------------------------------------===//
 
@@ -3995,3 +4147,15 @@
   LLVMContext &Context = getContext();
   return new UnreachableInst(Context);
 }
+
+ForkInst *ForkInst::cloneImpl() const {
+  return new(getNumOperands()) ForkInst(*this);
+}
+
+HaltInst *HaltInst::cloneImpl() const {
+  return new(getNumOperands()) HaltInst(*this);
+}
+
+JoinInst *JoinInst::cloneImpl() const {
+  return new(getNumOperands()) JoinInst(*this);
+}
Index: test/PIR/basic.ll
===================================================================
--- /dev/null
+++ test/PIR/basic.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+declare void @foo();
+
+define i32 @test() {
+entry:
+  ; CHECK: fork label %forked, %cont
+  fork label %forked, label %cont
+
+forked:                                       ; preds = %entry
+  call void @foo()
+  ; CHECK: halt label %cont
+  halt label %cont
+
+cont:                                         ; preds = %entry, %forked
+  call void @foo()
+  ; CHECK: join label %join
+  join label %join
+
+join:                                         ; preds = %cont
+  ret i32 0
+}
Index: test/PIR/invalid_fork.ll
===================================================================
--- /dev/null
+++ test/PIR/invalid_fork.ll
@@ -0,0 +1,19 @@
+; RUN: not llvm-as < %s
+
+declare void @foo();
+
+define i32 @test() {
+entry:
+  fork label %forked
+
+forked:                                       ; preds = %entry
+  call void @foo()
+  halt label %cont
+
+cont:                                         ; preds = %entry, %forked
+  call void @foo()
+  join label %join
+
+join:                                         ; preds = %cont
+  ret i32 0
+}
Index: unittests/IR/InstructionsTest.cpp
===================================================================
--- unittests/IR/InstructionsTest.cpp
+++ unittests/IR/InstructionsTest.cpp
@@ -168,6 +168,44 @@
   delete bb1;
 }
 
+  TEST(InstructionTest, ForkInst) {
+    LLVMContext C;
+
+    // create fork destinations
+    BasicBlock *Forked = BasicBlock::Create(C);
+    BasicBlock *Continue = BasicBlock::Create(C);
+
+    // create fork instruction
+    ForkInst *Fork = ForkInst::Create(Forked, Continue);
+
+    EXPECT_EQ(Forked, Fork->getSuccessor(0));
+    EXPECT_EQ(Forked, Fork->getForked());
+    EXPECT_EQ(Continue, Fork->getSuccessor(1));
+    EXPECT_EQ(Continue, Fork->getContinue());
+
+    // test swapping destinations
+    Fork->swapSuccessors();
+
+    EXPECT_EQ(Forked, Fork->getSuccessor(1));
+    EXPECT_EQ(Forked, Fork->getContinue());
+    EXPECT_EQ(Continue, Fork->getSuccessor(0));
+    EXPECT_EQ(Continue, Fork->getForked());
+
+    EXPECT_NE(Forked, Fork->getSuccessor(0));
+    EXPECT_NE(Forked, Fork->getForked());
+    EXPECT_NE(Continue, Fork->getSuccessor(1));
+    EXPECT_NE(Continue, Fork->getContinue());
+
+    // test instruction type
+    EXPECT_TRUE(ForkInst::classof(Fork));
+
+    // clean up
+    delete Fork;
+
+    delete Forked;
+    delete Continue;
+  }
+
 TEST(InstructionsTest, CastInst) {
   LLVMContext C;