Index: llvm/trunk/lib/Target/Sparc/LeonFeatures.td
===================================================================
--- llvm/trunk/lib/Target/Sparc/LeonFeatures.td
+++ llvm/trunk/lib/Target/Sparc/LeonFeatures.td
@@ -78,14 +78,17 @@
                        "every single-cycle load instruction when the next "
                        "instruction is another load/store instruction">;
 
-def FlushCacheLineSWAP
-    : SubtargetFeature<"flshcachelineswap", "FlushCacheLineSWAP", "true",
-                       "LEON3 erratum fix: Flush cache line containing the "
-                       "lock before performing any of the atomic instructions "
-                       "SWAP and LDSTUB">;
-
 def InsertNOPsLoadStore
     : SubtargetFeature<"insertnopsloadstore", "InsertNOPsLoadStore", "true",
                        "LEON3 erratum fix: Insert NOPs between "
                        "single-precision loads and the store, so the number of "
                        "instructions between is 4">;
+
+def FillDataCache : SubtargetFeature<"filldatacache", "FillDataCache", "true",
+                                     "LEON2 erratum fix: Ensure data cache is "
+                                     "filled so that cache misses do not "
+                                     "happen later in program execution.">;
+
+def RestoreExecAddress
+    : SubtargetFeature<"restexecaddr", "RestoreExecAddress", "true",
+                       "LEON2 erratum fix: Restore execution address.">;
Index: llvm/trunk/lib/Target/Sparc/LeonPasses.h
===================================================================
--- llvm/trunk/lib/Target/Sparc/LeonPasses.h
+++ llvm/trunk/lib/Target/Sparc/LeonPasses.h
@@ -53,7 +53,7 @@
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   const char *getPassName() const override {
-    return "ReplaceSDIV: Erratum Fix LBR25:  do not emit SDIV, but emit SDIVCC "
+    return "ReplaceSDIV: Leon erratum fix:  do not emit SDIV, but emit SDIVCC "
            "instead";
   }
 };
@@ -66,11 +66,24 @@
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   const char *getPassName() const override {
-    return "FixCALL: Erratum Fix LBR26: restrict the size of the immediate "
+    return "FixCALL: Leon erratum fix: restrict the size of the immediate "
            "operand of the CALL instruction to 20 bits";
   }
 };
 
+class LLVM_LIBRARY_VISIBILITY RestoreExecAddress : public LEONMachineFunctionPass {
+public:
+  static char ID;
+
+  RestoreExecAddress(TargetMachine &tm);
+  bool runOnMachineFunction(MachineFunction& MF) override;
+
+  const char *getPassName() const override {
+    return "RestoreExecAddress: Leon erratum fix: ensure execution "
+           "address is restored for bad floating point trap handlers.";
+  }
+};
+
 class LLVM_LIBRARY_VISIBILITY IgnoreZeroFlag : public LEONMachineFunctionPass {
 public:
   static char ID;
@@ -79,11 +92,24 @@
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   const char *getPassName() const override {
-    return "IgnoreZeroFlag: Erratum Fix LBR28: do not rely on the zero bit "
+    return "IgnoreZeroFlag: Leon erratum fix: do not rely on the zero bit "
            "flag on a divide overflow for SDIVCC and UDIVCC";
   }
 };
 
+class LLVM_LIBRARY_VISIBILITY FillDataCache : public LEONMachineFunctionPass {
+public:
+  static char ID;
+  static bool CacheFilled;
+
+  FillDataCache(TargetMachine &tm);
+  bool runOnMachineFunction(MachineFunction& MF) override;
+
+  const char *getPassName() const override {
+    return "FillDataCache: Leon erratum fix: fill data cache with values at application startup";
+  }
+};
+
 class LLVM_LIBRARY_VISIBILITY InsertNOPDoublePrecision
     : public LEONMachineFunctionPass {
 public:
@@ -93,7 +119,7 @@
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   const char *getPassName() const override {
-    return "InsertNOPDoublePrecision: Erratum Fix LBR30: insert a NOP before "
+    return "InsertNOPDoublePrecision: Leon erratum fix: insert a NOP before "
            "the double precision floating point instruction";
   }
 };
@@ -106,7 +132,7 @@
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   const char *getPassName() const override {
-    return "FixFSMULD: Erratum Fix LBR31: do not select FSMULD";
+    return "FixFSMULD: Leon erratum fix: do not utilize FSMULD";
   }
 };
 
@@ -118,7 +144,7 @@
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   const char *getPassName() const override {
-    return "ReplaceFMULS: Erratum Fix LBR32: replace FMULS instruction with a "
+    return "ReplaceFMULS: Leon erratum fix: Replace FMULS instruction with a "
            "routine using conversions/double precision operations to replace "
            "FMULS";
   }
@@ -133,7 +159,7 @@
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   const char *getPassName() const override {
-    return "PreventRoundChange: Erratum Fix LBR33: prevent any rounding mode "
+    return "PreventRoundChange: Leon erratum fix: prevent any rounding mode "
            "change request: use only the round-to-nearest rounding mode";
   }
 };
@@ -146,7 +172,7 @@
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   const char *getPassName() const override {
-    return "FixAllFDIVSQRT: Erratum Fix LBR34: fix FDIVS/FDIVD/FSQRTS/FSQRTD "
+    return "FixAllFDIVSQRT: Leon erratum fix: Fix FDIVS/FDIVD/FSQRTS/FSQRTD "
            "instructions with NOPs and floating-point store";
   }
 };
@@ -159,27 +185,12 @@
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   const char *getPassName() const override {
-    return "InsertNOPLoad: insert a NOP instruction after "
+    return "InsertNOPLoad: Leon erratum fix: Insert a NOP instruction after "
            "every single-cycle load instruction when the next instruction is "
            "another load/store instruction";
   }
 };
 
-class LLVM_LIBRARY_VISIBILITY FlushCacheLineSWAP
-    : public LEONMachineFunctionPass {
-public:
-  static char ID;
-
-  FlushCacheLineSWAP(TargetMachine &tm);
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  const char *getPassName() const override {
-    return "FlushCacheLineSWAP: Erratum Fix LBR36: flush cache line containing "
-           "the lock before performing any of the atomic instructions SWAP and "
-           "LDSTUB";
-  }
-};
-
 class LLVM_LIBRARY_VISIBILITY InsertNOPsLoadStore
     : public LEONMachineFunctionPass {
 public:
@@ -189,7 +200,7 @@
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   const char *getPassName() const override {
-    return "InsertNOPsLoadStore: Erratum Fix LBR37: insert NOPs between "
+    return "InsertNOPsLoadStore: Leon Erratum Fix: Insert NOPs between "
            "single-precision loads and the store, so the number of "
            "instructions between is 4";
   }
Index: llvm/trunk/lib/Target/Sparc/LeonPasses.cpp
===================================================================
--- llvm/trunk/lib/Target/Sparc/LeonPasses.cpp
+++ llvm/trunk/lib/Target/Sparc/LeonPasses.cpp
@@ -16,6 +16,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -89,15 +90,6 @@
         MachineBasicBlock::iterator NMBBI = std::next(MBBI);
         BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
         Modified = true;
-      } else if (MI.isInlineAsm()) {
-        // Look for an inline ld or ldf instruction.
-        StringRef AsmString =
-            MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
-        if (AsmString.startswith_lower("ld")) {
-          MachineBasicBlock::iterator NMBBI = std::next(MBBI);
-          BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
-          Modified = true;
-        }
       }
     }
   }
@@ -147,29 +139,6 @@
         Reg1Index = MI.getOperand(0).getReg();
         Reg2Index = MI.getOperand(1).getReg();
         Reg3Index = MI.getOperand(2).getReg();
-      } else if (MI.isInlineAsm()) {
-        StringRef AsmString =
-            MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
-        if (AsmString.startswith_lower("fsmuld")) {
-          // this is an inline FSMULD instruction
-
-          unsigned StartOp = InlineAsm::MIOp_FirstOperand;
-
-          // extracts the registers from the inline assembly instruction
-          for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) {
-            const MachineOperand &MO = MI.getOperand(i);
-            if (MO.isReg()) {
-              if (Reg1Index == UNASSIGNED_INDEX)
-                Reg1Index = MO.getReg();
-              else if (Reg2Index == UNASSIGNED_INDEX)
-                Reg2Index = MO.getReg();
-              else if (Reg3Index == UNASSIGNED_INDEX)
-                Reg3Index = MO.getReg();
-            }
-            if (Reg3Index != UNASSIGNED_INDEX)
-              break;
-          }
-        }
       }
 
       if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX &&
@@ -259,28 +228,6 @@
         Reg1Index = MI.getOperand(0).getReg();
         Reg2Index = MI.getOperand(1).getReg();
         Reg3Index = MI.getOperand(2).getReg();
-      } else if (MI.isInlineAsm()) {
-        StringRef AsmString =
-            MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
-        if (AsmString.startswith_lower("fmuls")) {
-          // this is an inline FMULS instruction
-          unsigned StartOp = InlineAsm::MIOp_FirstOperand;
-
-          // extracts the registers from the inline assembly instruction
-          for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) {
-            const MachineOperand &MO = MI.getOperand(i);
-            if (MO.isReg()) {
-              if (Reg1Index == UNASSIGNED_INDEX)
-                Reg1Index = MO.getReg();
-              else if (Reg2Index == UNASSIGNED_INDEX)
-                Reg2Index = MO.getReg();
-              else if (Reg3Index == UNASSIGNED_INDEX)
-                Reg3Index = MO.getReg();
-            }
-            if (Reg3Index != UNASSIGNED_INDEX)
-              break;
-          }
-        }
       }
 
       if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX &&
@@ -362,18 +309,6 @@
       MachineInstr &MI = *MBBI;
       unsigned Opcode = MI.getOpcode();
 
-      if (MI.isInlineAsm()) {
-        StringRef AsmString =
-            MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
-        if (AsmString.startswith_lower("fsqrtd")) {
-          // this is an inline fsqrts instruction
-          Opcode = SP::FSQRTD;
-        } else if (AsmString.startswith_lower("fdivd")) {
-          // this is an inline fsqrts instruction
-          Opcode = SP::FDIVD;
-        }
-      }
-
       // Note: FDIVS and FSQRTS cannot be generated when this erratum fix is
       // switched on so we don't need to check for them here. They will
       // already have been converted to FSQRTD or FDIVD earlier in the
@@ -453,8 +388,6 @@
     MachineBasicBlock &MBB = *MFI;
     for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
       MachineInstr &MI = *MBBI;
-      MI.print(errs());
-      errs() << "\n";
 
       unsigned Opcode = MI.getOpcode();
       if (Opcode == SP::CALL || Opcode == SP::CALLrr) {
@@ -469,24 +402,6 @@
             break;
           }
         }
-      } else if (MI.isInlineAsm()) // inline assembly immediate call
-      {
-        StringRef AsmString =
-            MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
-        if (AsmString.startswith_lower("call")) {
-          // this is an inline call instruction
-          unsigned StartOp = InlineAsm::MIOp_FirstOperand;
-
-          // extracts the registers from the inline assembly instruction
-          for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) {
-            MachineOperand &MO = MI.getOperand(i);
-            if (MO.isImm()) {
-              int64_t Value = MO.getImm();
-              MO.setImm(Value & 0x000fffffL);
-              Modified = true;
-            }
-          }
-        }
       }
     }
   }
@@ -562,55 +477,6 @@
         BuildMI(MBB, NextMBBI, DL, TII.get(SP::NOP));
 
         Modified = true;
-      } else if (MI.isInlineAsm()) {
-        StringRef AsmString =
-            MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
-        if (AsmString.startswith_lower("sdivcc") ||
-            AsmString.startswith_lower("udivcc")) {
-          // this is an inline SDIVCC or UDIVCC instruction
-
-          // split the current machine basic block - just after the
-          // sdivcc/udivcc instruction
-          // create a label that help us skip the zero flag update (of PSR -
-          // Processor Status Register)
-          // if conditions are not met
-          const BasicBlock *LLVM_BB = MBB.getBasicBlock();
-          MachineFunction::iterator It =
-              std::next(MachineFunction::iterator(MBB));
-
-          MachineBasicBlock *dneBB = MF.CreateMachineBasicBlock(LLVM_BB);
-          MF.insert(It, dneBB);
-
-          // Transfer the remainder of MBB and its successor edges to dneBB.
-          dneBB->splice(dneBB->begin(), &MBB,
-                        std::next(MachineBasicBlock::iterator(MI)), MBB.end());
-          dneBB->transferSuccessorsAndUpdatePHIs(&MBB);
-
-          MBB.addSuccessor(dneBB);
-
-          MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
-
-          // bvc - branch if overflow flag not set
-          BuildMI(MBB, NextMBBI, DL, TII.get(SP::BCOND))
-              .addMBB(dneBB)
-              .addImm(SPCC::ICC_VS);
-
-          // bnz - branch if not zero
-          BuildMI(MBB, NextMBBI, DL, TII.get(SP::BCOND))
-              .addMBB(dneBB)
-              .addImm(SPCC::ICC_NE);
-
-          // use the WRPSR (Write Processor State Register) instruction to set
-          // the zeo flag to 1
-          // create wr %g0, 1, %psr
-          BuildMI(MBB, NextMBBI, DL, TII.get(SP::WRPSRri))
-              .addReg(SP::G0)
-              .addImm(1);
-
-          BuildMI(MBB, NextMBBI, DL, TII.get(SP::NOP));
-
-          Modified = true;
-        }
       }
     }
   }
@@ -652,7 +518,6 @@
         MachineInstr &NMI = *NMBBI;
 
         unsigned NextOpcode = NMI.getOpcode();
-        // NMI.print(errs());
         if (NextOpcode == SP::FADDD || NextOpcode == SP::FSUBD ||
             NextOpcode == SP::FMULD || NextOpcode == SP::FDIVD) {
           int RegAIndex = GetRegIndexForOperand(MI, 0);
@@ -728,6 +593,12 @@
           StringRef FuncName = MO.getGlobal()->getName();
           if (FuncName.compare_lower("fesetround") == 0) {
             MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+            emitOptimizationRemark(
+                MF.getFunction()->getContext(), getPassName(), *MF.getFunction(),
+                MI.getDebugLoc(), "Warning: You are using the prvntroundchange "
+                                  "option to prevent rounding changes caused "
+                                  "by LEON errata. A call to fesetround to be "
+                                  "removed from the output.");
             MI.eraseFromParent();
             MBBI = NMBBI;
             Modified = true;
@@ -740,62 +611,6 @@
   return Modified;
 }
 //*****************************************************************************
-//**** FlushCacheLineSWAP pass
-//*****************************************************************************
-// This pass inserts FLUSHW just before any SWAP atomic instruction.
-//
-char FlushCacheLineSWAP::ID = 0;
-
-FlushCacheLineSWAP::FlushCacheLineSWAP(TargetMachine &tm)
-    : LEONMachineFunctionPass(tm, ID) {}
-
-bool FlushCacheLineSWAP::runOnMachineFunction(MachineFunction &MF) {
-  Subtarget = &MF.getSubtarget<SparcSubtarget>();
-  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
-  DebugLoc DL = DebugLoc();
-
-  bool Modified = false;
-  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
-    MachineBasicBlock &MBB = *MFI;
-    for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
-      MachineInstr &MI = *MBBI;
-      unsigned Opcode = MI.getOpcode();
-      if (Opcode == SP::SWAPrr || Opcode == SP::SWAPri ||
-          Opcode == SP::LDSTUBrr || Opcode == SP::LDSTUBri) {
-        // insert flush and 5 NOPs before the swap/ldstub instruction
-        BuildMI(MBB, MBBI, DL, TII.get(SP::FLUSH));
-        BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-        BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-        BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-        BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-        BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-
-        Modified = true;
-      } else if (MI.isInlineAsm()) {
-        StringRef AsmString =
-            MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
-        if (AsmString.startswith_lower("swap") ||
-            AsmString.startswith_lower("ldstub")) {
-          // this is an inline swap or ldstub instruction
-
-          // insert flush and 5 NOPs before the swap/ldstub instruction
-          BuildMI(MBB, MBBI, DL, TII.get(SP::FLUSH));
-          BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-          BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-          BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-          BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-          BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-
-          Modified = true;
-        }
-      }
-    }
-  }
-
-  return Modified;
-}
-
-//*****************************************************************************
 //**** InsertNOPsLoadStore pass
 //*****************************************************************************
 // This pass shall insert NOPs between floating point loads and stores when the
@@ -930,3 +745,189 @@
 
   return Modified;
 }
+
+
+//****************************************************************************************************************
+//**** FillDataCache pass
+//****************************************************************************************************************
+// This erratum fix inserts after the first operand a loop performing 4096 NOP
+// instructions.
+//
+// mov 0, %l0
+// mov 4096, %l1
+// loop1:
+// inc %l0
+// cmp %l0, %l1
+// ble loop1
+
+char FillDataCache::ID = 0;
+bool FillDataCache::CacheFilled = false;
+
+FillDataCache::FillDataCache(TargetMachine &tm)
+    : LEONMachineFunctionPass(tm, ID) {}
+
+bool FillDataCache::runOnMachineFunction(MachineFunction &MF) {
+  Subtarget = &MF.getSubtarget<SparcSubtarget>();
+  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+  DebugLoc DL = DebugLoc();
+
+  unsigned int CountInstr = 0;
+
+  bool Modified = false;
+  if (!CacheFilled) {
+    for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
+
+      if (CacheFilled)
+        break;
+
+      MachineBasicBlock &MBB = *MFI;
+
+      for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
+        MachineInstr &MI = *MBBI;
+
+        CountInstr++;
+        MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
+        MBBI = NextMBBI;
+
+        // insert the following sequence right after the first instruction
+        // initializing the stack pointer (sp register)
+        // or %g0, 1, %g1
+        // loop1:
+        // nop
+        // add %g1, 1, %g1
+        // cmp %g1, 4096
+        // ble  .LBB0_1
+        if (CountInstr == 1) {
+          BuildMI(MBB, NextMBBI, DL, TII.get(SP::ORrr))
+              .addReg(SP::G1)
+              .addReg(SP::G0)
+              .addImm(1);
+        } else {
+          const BasicBlock *LLVM_BB = MBB.getBasicBlock();
+          MachineBasicBlock *dneBB = MF.CreateMachineBasicBlock(LLVM_BB);
+
+          MachineFunction::iterator It =
+              std::next(MachineFunction::iterator(MBB));
+
+          MF.insert(It, dneBB);
+
+          BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
+
+          BuildMI(MBB, MBBI, DL, TII.get(SP::ADDri))
+              .addReg(SP::G1)
+              .addReg(SP::G1)
+              .addImm(1);
+
+          BuildMI(MBB, MBBI, DL, TII.get(SP::CMPri))
+              .addReg(SP::G1)
+              .addImm(4096);
+
+          BuildMI(MBB, MBBI, DL, TII.get(SP::BCOND))
+              .addMBB(dneBB)
+              .addImm(SPCC::ICC_LE);
+
+          dneBB->splice(dneBB->begin(), &MBB,
+                        std::next(MachineBasicBlock::iterator(MI)), MBB.end());
+          dneBB->transferSuccessorsAndUpdatePHIs(&MBB);
+
+          MBB.addSuccessor(dneBB);
+
+          CacheFilled = true;
+          Modified = true;
+          break;
+        }
+      }
+    }
+  }
+
+  return Modified;
+}
+
+
+//****************************************************************************************************************
+//**** RestoreExecAddress pass
+//****************************************************************************************************************
+// This erratum fix should handle user traps of FPU exceptions and restore the
+// execution address by skipping the trapped FPU instruction.
+// The algorithm:
+// find rett - return from trap
+// insert code before rett to:
+// 1. load the FSR register
+// 2. check if there is an FPU exception
+// 3. branch to old rett if there is no exception
+// 4. rett to a restored exec address
+char RestoreExecAddress::ID = 0;
+
+RestoreExecAddress::RestoreExecAddress(TargetMachine &tm)
+    : LEONMachineFunctionPass(tm, ID) {}
+
+bool RestoreExecAddress::runOnMachineFunction(MachineFunction &MF) {
+  Subtarget = &MF.getSubtarget<SparcSubtarget>();
+  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+  DebugLoc DL = DebugLoc();
+
+  bool Modified = false;
+  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    bool ExecAddressRestored = false;
+    for (auto NMBBI = MBB.begin(), E = MBB.end(); NMBBI != E; ++NMBBI) {
+
+      if (NMBBI != E && !ExecAddressRestored) {
+        MachineBasicBlock::iterator MBBI = std::next(NMBBI);
+        MachineInstr &MI = *MBBI;
+        unsigned Opcode = MI.getOpcode();
+
+        if (Opcode == SP::RETTrr || Opcode == SP::RETTri) {
+
+          const BasicBlock *LLVM_BB = MBB.getBasicBlock();
+
+          MachineBasicBlock *dneBB = MF.CreateMachineBasicBlock(LLVM_BB);
+
+          // gets the FSR - floating point status register;
+          // the firts 4 bits are *cexc* - current exception flags
+          BuildMI(MBB, MBBI, DL, TII.get(SP::STFSRrr)).addReg(SP::L7).addImm(0);
+
+          BuildMI(MBB, MBBI, DL, TII.get(SP::LDrr))
+              .addReg(SP::L7)
+              .addReg(SP::L7)
+              .addImm(0);
+
+          // performs a bitwise AND with b1111 to check the first 4 bits of FSR
+          // (cexc)
+          // if cexc is not zero, then it is an FPU exception
+          BuildMI(MBB, MBBI, DL, TII.get(SP::ANDri))
+              .addReg(SP::L7)
+              .addReg(SP::L7)
+              .addImm(15);
+
+          BuildMI(MBB, MBBI, DL, TII.get(SP::CMPri)).addReg(SP::L7).addImm(0);
+
+          BuildMI(MBB, MBBI, DL, TII.get(SP::BCOND))
+              .addMBB(dneBB)
+              .addImm(SPCC::ICC_E);
+          // BuildMI(&MBB, DL,
+          // TII.get(SP::BCOND)).addMBB(dneBB).addImm(SPCC::ICC_E);
+
+          BuildMI(MBB, MBBI, DL, TII.get(SP::RETTri)).addReg(SP::L2).addImm(4);
+
+          MachineFunction::iterator It =
+              std::next(MachineFunction::iterator(MBB));
+          MF.insert(It, dneBB);
+
+          // Transfer the remainder of MBB and its successor edges to dneBB.
+          dneBB->splice(dneBB->begin(), &MBB, MachineBasicBlock::iterator(MI),
+                        MBB.end());
+          dneBB->transferSuccessorsAndUpdatePHIs(&MBB);
+
+          MBB.addSuccessor(dneBB);
+
+          ExecAddressRestored = true;
+          Modified = true;
+        }
+      }
+    }
+  }
+
+  return Modified;
+}
+
Index: llvm/trunk/lib/Target/Sparc/Sparc.td
===================================================================
--- llvm/trunk/lib/Target/Sparc/Sparc.td
+++ llvm/trunk/lib/Target/Sparc/Sparc.td
@@ -107,7 +107,8 @@
 // AT697E: Provides full coverage of AT697E - covers all the erratum fixes for
 // LEON2 AT697E
 def : Processor<"at697e", LEON2Itineraries, [
-  FeatureLeon, ReplaceSDIV, FixCALL, IgnoreZeroFlag, InsertNOPDoublePrecision
+  FeatureLeon, ReplaceSDIV, FixCALL, IgnoreZeroFlag, InsertNOPDoublePrecision,
+  FillDataCache, RestoreExecAddress
 ]>;
 
 // LEON 2 FT (AT697F)
@@ -124,7 +125,7 @@
 // instruction.
 def : Processor<"ut699", LEON3Itineraries, [
   FeatureLeon, FixFSMULD, ReplaceFMULS, PreventRoundChange,
-  FixAllFDIVSQRT, InsertNOPLoad, FlushCacheLineSWAP, InsertNOPsLoadStore
+  FixAllFDIVSQRT, InsertNOPLoad, InsertNOPsLoadStore
 ]>;
 
 // LEON3 FT (GR712RC). Provides features for the GR712RC processor.
Index: llvm/trunk/lib/Target/Sparc/SparcSubtarget.h
===================================================================
--- llvm/trunk/lib/Target/Sparc/SparcSubtarget.h
+++ llvm/trunk/lib/Target/Sparc/SparcSubtarget.h
@@ -54,8 +54,9 @@
   bool IgnoreZeroFlag;
   bool InsertNOPDoublePrecision;
   bool PreventRoundChange;
-  bool FlushCacheLineSWAP;
   bool InsertNOPsLoadStore;
+  bool FillDataCache;
+  bool RestoreExecAddress;
 
   SparcInstrInfo InstrInfo;
   SparcTargetLowering TLInfo;
@@ -104,9 +105,10 @@
   bool replaceFMULS() const { return ReplaceFMULS; }
   bool preventRoundChange() const { return PreventRoundChange; }
   bool fixAllFDIVSQRT() const { return FixAllFDIVSQRT; }
-  bool flushCacheLineSWAP() const { return FlushCacheLineSWAP; }
   bool insertNOPsLoadStore() const { return InsertNOPsLoadStore; }
   bool insertNOPLoad() const { return InsertNOPLoad; }
+  bool fillDataCache() const { return FillDataCache; }
+  bool restoreExecAddr() const { return RestoreExecAddress; }
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
Index: llvm/trunk/lib/Target/Sparc/SparcSubtarget.cpp
===================================================================
--- llvm/trunk/lib/Target/Sparc/SparcSubtarget.cpp
+++ llvm/trunk/lib/Target/Sparc/SparcSubtarget.cpp
@@ -48,7 +48,6 @@
   PreventRoundChange = false;
   FixAllFDIVSQRT = false;
   InsertNOPLoad = false;
-  FlushCacheLineSWAP = false;
   InsertNOPsLoadStore = false;
 
   // Determine default and user specified characteristics
Index: llvm/trunk/lib/Target/Sparc/SparcTargetMachine.cpp
===================================================================
--- llvm/trunk/lib/Target/Sparc/SparcTargetMachine.cpp
+++ llvm/trunk/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -171,8 +171,11 @@
   if (this->getSparcTargetMachine().getSubtargetImpl()->insertNOPLoad()) {
     addPass(new InsertNOPLoad(getSparcTargetMachine()));
   }
-  if (this->getSparcTargetMachine().getSubtargetImpl()->flushCacheLineSWAP()) {
-    addPass(new FlushCacheLineSWAP(getSparcTargetMachine()));
+  if (this->getSparcTargetMachine().getSubtargetImpl()->fillDataCache()) {
+    addPass(new FillDataCache(getSparcTargetMachine()));
+  }
+  if (this->getSparcTargetMachine().getSubtargetImpl()->restoreExecAddr()) {
+    addPass(new RestoreExecAddress(getSparcTargetMachine()));
   }
   if (this->getSparcTargetMachine()
           .getSubtargetImpl()
Index: llvm/trunk/test/CodeGen/SPARC/LeonFillDataCachePassUT.ll
===================================================================
--- llvm/trunk/test/CodeGen/SPARC/LeonFillDataCachePassUT.ll
+++ llvm/trunk/test/CodeGen/SPARC/LeonFillDataCachePassUT.ll
@@ -0,0 +1,27 @@
+; RUN: llc %s -O0 -march=sparc -mcpu=leon2 -mattr=+filldatacache -o - | FileCheck %s
+; RUN: llc %s -O0 -march=sparc -mcpu=at697e -o - | FileCheck %s
+; RUN: llc %s -O0 -march=sparc -mcpu=at697f -mattr=+filldatacache -o - | FileCheck %s
+
+; CHECK-LABEL: test_filldatacache_1
+; CHECK:       or %g0, 1, %g1
+; CHECK:       nop
+; CHECK-NEXT:  add %g1, 1, %g1
+; CHECK-NEXT:  cmp %g1, 4096
+; CHECK-NEXT:  ble {{.+}}
+define zeroext i1@test_filldatacache_1(i1 zeroext %a, i1 zeroext %b) {
+  %1 = tail call zeroext i1 asm sideeffect "udivcc $0, $1, $2", "=r,r,r"(i1 zeroext %a, i1 zeroext %b)
+
+  ret i1 %1
+}
+
+
+; CHECK-LABEL: test_filldatacache_2
+; CHECK-NOT:   or %g0, 1, %g1
+; CHECK-NOT:   add %g1, 1, %g1
+; CHECK-NOT:   cmp %g1, 4096
+; CHECK-NOT:   ble {{.+}}
+define zeroext i1@test_filldatacache_2(i1 zeroext %a, i1 zeroext %b) {
+  %1 = tail call zeroext i1 asm sideeffect "sdivcc $0, $1, $2", "=r,r,r"(i1 zeroext %a, i1 zeroext %b)
+
+  ret i1 %1
+}
Index: llvm/trunk/test/CodeGen/SPARC/LeonFixCALLPassUT.ll
===================================================================
--- llvm/trunk/test/CodeGen/SPARC/LeonFixCALLPassUT.ll
+++ llvm/trunk/test/CodeGen/SPARC/LeonFixCALLPassUT.ll
@@ -1,20 +0,0 @@
-; RUN: llc %s -O0 -march=sparc -mcpu=at697e -o - | FileCheck %s -check-prefix=FIXCALL
-; RUN: llc %s -O0 -march=sparc -mcpu=leon2 -mattr=+fixcall -o - | FileCheck %s -check-prefix=FIXCALL
-
-; RUN: llc %s -O0 -march=sparc -mcpu=at697e -mattr=-fixcall -o - | FileCheck %s -check-prefix=NO_FIXCALL
-; RUN: llc %s -O0 -march=sparc -mcpu=leon2  -o - | FileCheck %s -check-prefix=NO_FIXCALL
-
-
-; FIXCALL-LABEL:       	immediate_call_test
-; FIXCALL:       	call 763288
-
-; NO_FIXCALL-LABEL:     immediate_call_test
-; NO_FIXCALL:       	call 2047583640
-define void @immediate_call_test() nounwind {
-entry:
-        call void asm sideeffect "call $0", "i"(i32 2047583640) nounwind
-        ret void
-}
-
-
-
Index: llvm/trunk/test/CodeGen/SPARC/LeonFixFSMULDPassUT.ll
===================================================================
--- llvm/trunk/test/CodeGen/SPARC/LeonFixFSMULDPassUT.ll
+++ llvm/trunk/test/CodeGen/SPARC/LeonFixFSMULDPassUT.ll
@@ -1,31 +1,17 @@
-; RUN: llc %s -O0 -march=sparc -mcpu=ut699 -o - | FileCheck %s
+; RUN: llc %s -O0 -march=sparc -mattr=fixfsmuld -o - | FileCheck %s
+; RUN: llc %s -O0 -march=sparc -o - | FileCheck %s --check-prefix=NOFIX
 
 ; CHECK-LABEL: test_fix_fsmuld_1
-; CHECK:       fstod %f20, %f2
-; CHECK:       fstod %f21, %f3
-; CHECK:       fmuld %f2, %f3, %f8
-; CHECK:       fstod %f20, %f0
-define double @test_fix_fsmuld_1() {
+; CHECK:       fstod %f1, %f2
+; CHECK:       fstod %f0, %f4
+; CHECK:       fmuld %f2, %f4, %f0
+; NOFIX-LABEL: test_fix_fsmuld_1
+; NOFIX:       fsmuld %f1, %f0, %f0
+define double @test_fix_fsmuld_1(float %a, float %b) {
 entry:
-  %a = alloca float, align 4
-  %b = alloca float, align 4
-  store float 0x402ECCCCC0000000, float* %a, align 4
-  store float 0x4022333340000000, float* %b, align 4
-  %0 = load float, float* %b, align 4
-  %1 = load float, float* %a, align 4
-  %mul = tail call double asm sideeffect "fsmuld $0, $1, $2", "={f20},{f21},{f8}"(float* %a, float* %b)
-
-  ret double %mul
-}
-
-; CHECK-LABEL: test_fix_fsmuld_2
-; CHECK:       fstod %f20, %f2
-; CHECK:       fstod %f21, %f3
-; CHECK:       fmuld %f2, %f3, %f8
-; CHECK:       fstod %f20, %f0
-define double @test_fix_fsmuld_2(float* %a, float* %b) {
-entry:
-  %mul = tail call double asm sideeffect "fsmuld $0, $1, $2", "={f20},{f21},{f8}"(float* %a, float* %b)
+  %0 = fpext float %a to double
+  %1 = fpext float %b to double
+  %mul = fmul double %0, %1
 
   ret double %mul
 }
Index: llvm/trunk/test/CodeGen/SPARC/LeonInsertNOPLoadPassUT.ll
===================================================================
--- llvm/trunk/test/CodeGen/SPARC/LeonInsertNOPLoadPassUT.ll
+++ llvm/trunk/test/CodeGen/SPARC/LeonInsertNOPLoadPassUT.ll
@@ -19,25 +19,3 @@
   %res = load i32, i32* %p
   ret i32 %res
 }
-
-; CHECK-LABEL: ld_inlineasm_test_1
-; CHECK:       ld [%o0], %o0
-; CHECK-NEXT:  !NO_APP
-; CHECK-NEXT:  nop
-define float @ld_inlineasm_test_1(float* %a) {
-entry:
-  %res = tail call float asm sideeffect "ld [$1], $0", "=r,r"(float* %a)
-
-  ret float %res
-}
-
-; CHECK-LABEL: ld_inlineasm_test_2
-; CHECK:       ld [%o0], %o0
-; CHECK-NEXT:  !NO_APP
-; CHECK-NEXT:  nop
-define i32 @ld_inlineasm_test_2(i32* %a) {
-entry:
-  %res = tail call i32 asm sideeffect "ld [$1], $0", "=r,r"(i32* %a)
-
-  ret i32 %res
-}
\ No newline at end of file
Index: llvm/trunk/test/CodeGen/SPARC/LeonReplaceFMULSPassUT.ll
===================================================================
--- llvm/trunk/test/CodeGen/SPARC/LeonReplaceFMULSPassUT.ll
+++ llvm/trunk/test/CodeGen/SPARC/LeonReplaceFMULSPassUT.ll
@@ -1,19 +1,13 @@
-; RUN: llc %s -O0 -march=sparc -mcpu=ut699 -o - | FileCheck %s
+; RUN: llc %s -O0 -march=sparc -mattr=replacefmuls -o - | FileCheck %s
 
-; CHECK-LABEL: fmuls_fix_test
-; CHECK:       fstod %f20, %f2
-; CHECK:       fstod %f21, %f3
-; CHECK:       fmuld %f2, %f3, %f8
-; CHECK:       fstod %f20, %f0
-define double @fmuls_fix_test() {
+; CHECK-LABEL: test_replace_fmuls
+; CHECK:       fsmuld %f1, %f0, %f2
+; CHECK:       fdtos %f2, %f0
+; NOFIX-LABEL: test_replace_fmuls
+; NOFIX:       fmuls %f1, %f0, %f0
+define float @test_replace_fmuls(float %a, float %b) {
 entry:
-  %a = alloca float, align 4
-  %b = alloca float, align 4
-  store float 0x402ECCCCC0000000, float* %a, align 4
-  store float 0x4022333340000000, float* %b, align 4
-  %0 = load float, float* %b, align 4
-  %1 = load float, float* %a, align 4
-  %mul = tail call double asm sideeffect "fmuls $0, $1, $2", "={f20},{f21},{f8}"(float* %a, float* %b)
+  %mul = fmul float %a, %b
 
-  ret double %mul
+  ret float %mul
 }