llvm · Jul 5, 2018
diff --git a/‎llvm/lib/Target/Mips/CMakeLists.txt
+1 b/‎llvm/lib/Target/Mips/CMakeLists.txt
+1
diff --git a/‎llvm/lib/Target/Mips/Mips.h
+1 b/‎llvm/lib/Target/Mips/Mips.h
+1
diff --git a/‎llvm/lib/Target/Mips/Mips64InstrInfo.td
+11 b/‎llvm/lib/Target/Mips/Mips64InstrInfo.td
+11
diff --git a/‎llvm/lib/Target/Mips/MipsExpandPseudo.cpp
+702 b/‎llvm/lib/Target/Mips/MipsExpandPseudo.cpp
+702
diff --git a/‎llvm/lib/Target/Mips/MipsISelLowering.cpp
+276-329 b/‎llvm/lib/Target/Mips/MipsISelLowering.cpp
+276-329
diff --git a/‎llvm/lib/Target/Mips/MipsISelLowering.h
+4-8 b/‎llvm/lib/Target/Mips/MipsISelLowering.h
+4-8
diff --git a/‎llvm/lib/Target/Mips/MipsInstrInfo.td
+54 b/‎llvm/lib/Target/Mips/MipsInstrInfo.td
+54
diff --git a/‎llvm/lib/Target/Mips/MipsTargetMachine.cpp
+9 b/‎llvm/lib/Target/Mips/MipsTargetMachine.cpp
+9
diff --git a/‎llvm/test/CodeGen/Mips/atomic.ll
+7,515-406 b/‎llvm/test/CodeGen/Mips/atomic.ll
+7,515-406
diff --git a/‎llvm/test/CodeGen/Mips/atomic64.ll
+1,397 b/‎llvm/test/CodeGen/Mips/atomic64.ll
+1,397
diff --git a/‎llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll
+109-13 b/‎llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll
+109-13
diff --git a/‎llvm/test/CodeGen/Mips/micromips-atomic.ll
+16-9 b/‎llvm/test/CodeGen/Mips/micromips-atomic.ll
+16-9
@@ -30,6 +30,7 @@ add_llvm_target(MipsCodeGen
   MipsCCState.cpp
   MipsConstantIslandPass.cpp
   MipsDelaySlotFiller.cpp
+  MipsExpandPseudo.cpp
   MipsFastISel.cpp
   MipsInstrInfo.cpp
   MipsInstructionSelector.cpp
 
@@ -37,6 +37,7 @@ namespace llvm {
   FunctionPass *createMipsBranchExpansion();
   FunctionPass *createMipsConstantIslandPass();
   FunctionPass *createMicroMipsSizeReducePass();
+  FunctionPass *createMipsExpandPseudoPass();
 
   InstructionSelector *createMipsInstructionSelector(const MipsTargetMachine &,
                                                      MipsSubtarget &,
 
@@ -85,6 +85,17 @@ let usesCustomInserter = 1 in {
   def ATOMIC_CMP_SWAP_I64  : AtomicCmpSwap<atomic_cmp_swap_64, GPR64>;
 }
 
+def ATOMIC_LOAD_ADD_I64_POSTRA  : Atomic2OpsPostRA<GPR64>;
+def ATOMIC_LOAD_SUB_I64_POSTRA  : Atomic2OpsPostRA<GPR64>;
+def ATOMIC_LOAD_AND_I64_POSTRA  : Atomic2OpsPostRA<GPR64>;
+def ATOMIC_LOAD_OR_I64_POSTRA   : Atomic2OpsPostRA<GPR64>;
+def ATOMIC_LOAD_XOR_I64_POSTRA  : Atomic2OpsPostRA<GPR64>;
+def ATOMIC_LOAD_NAND_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
+
+def ATOMIC_SWAP_I64_POSTRA      : Atomic2OpsPostRA<GPR64>;
+
+def ATOMIC_CMP_SWAP_I64_POSTRA  : AtomicCmpSwapPostRA<GPR64>;
+
 /// Pseudo instructions for loading and storing accumulator registers.
 let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
   def LOAD_ACC128  : Load<"", ACC128>;
 
@@ -679,17 +679,13 @@ class TargetRegisterClass;
                                                 unsigned Size, unsigned DstReg,
                                                 unsigned SrcRec) const;
 
-    MachineBasicBlock *emitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
-                                        unsigned Size, unsigned BinOpcode,
-                                        bool Nand = false) const;
+    MachineBasicBlock *emitAtomicBinary(MachineInstr &MI,
+                                        MachineBasicBlock *BB) const;
     MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr &MI,
                                                 MachineBasicBlock *BB,
-                                                unsigned Size,
-                                                unsigned BinOpcode,
-                                                bool Nand = false) const;
+                                                unsigned Size) const;
     MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI,
-                                         MachineBasicBlock *BB,
-                                         unsigned Size) const;
+                                         MachineBasicBlock *BB) const;
     MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr &MI,
                                                  MachineBasicBlock *BB,
                                                  unsigned Size) const;
 
@@ -1852,11 +1852,37 @@ class Atomic2Ops<PatFrag Op, RegisterClass DRC> :
   PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$incr),
            [(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]>;
 
+class Atomic2OpsPostRA<RegisterClass RC> :
+  PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr), []> {
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+class Atomic2OpsSubwordPostRA<RegisterClass RC> :
+  PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr, RC:$mask, RC:$mask2,
+                                RC:$shiftamnt), []>;
+
 // Atomic Compare & Swap.
+// Atomic compare and swap is lowered into two stages. The first stage happens
+// during ISelLowering, which produces the PostRA version of this instruction.
 class AtomicCmpSwap<PatFrag Op, RegisterClass DRC> :
   PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap),
            [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>;
 
+class AtomicCmpSwapPostRA<RegisterClass RC> :
+  PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$cmp, RC:$swap), []> {
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+class AtomicCmpSwapSubwordPostRA<RegisterClass RC> :
+  PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal,
+                                RC:$mask2, RC:$ShiftNewVal, RC:$ShiftAmt), []> {
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+
 class LLBase<string opstr, RegisterOperand RO, DAGOperand MO = mem> :
   InstSE<(outs RO:$rt), (ins MO:$addr), !strconcat(opstr, "\t$rt, $addr"),
          [], II_LL, FrmI, opstr> {
@@ -1942,8 +1968,36 @@ let usesCustomInserter = 1 in {
   def ATOMIC_CMP_SWAP_I8   : AtomicCmpSwap<atomic_cmp_swap_8, GPR32>;
   def ATOMIC_CMP_SWAP_I16  : AtomicCmpSwap<atomic_cmp_swap_16, GPR32>;
   def ATOMIC_CMP_SWAP_I32  : AtomicCmpSwap<atomic_cmp_swap_32, GPR32>;
+
 }
 
+def ATOMIC_LOAD_ADD_I8_POSTRA   : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_ADD_I16_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_ADD_I32_POSTRA  : Atomic2OpsPostRA<GPR32>;
+def ATOMIC_LOAD_SUB_I8_POSTRA   : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_SUB_I16_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_SUB_I32_POSTRA  : Atomic2OpsPostRA<GPR32>;
+def ATOMIC_LOAD_AND_I8_POSTRA   : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_AND_I16_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_AND_I32_POSTRA  : Atomic2OpsPostRA<GPR32>;
+def ATOMIC_LOAD_OR_I8_POSTRA    : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_OR_I16_POSTRA   : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_OR_I32_POSTRA   : Atomic2OpsPostRA<GPR32>;
+def ATOMIC_LOAD_XOR_I8_POSTRA   : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_XOR_I16_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_XOR_I32_POSTRA  : Atomic2OpsPostRA<GPR32>;
+def ATOMIC_LOAD_NAND_I8_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_NAND_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_NAND_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
+
+def ATOMIC_SWAP_I8_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_SWAP_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_SWAP_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
+
+def ATOMIC_CMP_SWAP_I8_POSTRA : AtomicCmpSwapSubwordPostRA<GPR32>;
+def ATOMIC_CMP_SWAP_I16_POSTRA : AtomicCmpSwapSubwordPostRA<GPR32>;
+def ATOMIC_CMP_SWAP_I32_POSTRA : AtomicCmpSwapPostRA<GPR32>;
+
 /// Pseudo instructions for loading and storing accumulator registers.
 let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
   def LOAD_ACC64  : Load<"", ACC64>;
 
@@ -240,6 +240,7 @@ class MipsPassConfig : public TargetPassConfig {
   bool addInstSelector() override;
   void addPreEmitPass() override;
   void addPreRegAlloc() override;
+  void addPreEmit2() ;
   bool addIRTranslator() override;
   bool addLegalizeMachineIR() override;
   bool addRegBankSelect() override;
@@ -285,10 +286,18 @@ MipsTargetMachine::getTargetTransformInfo(const Function &F) {
   return TargetTransformInfo(BasicTTIImpl(this, F));
 }
 
+void MipsPassConfig::addPreEmit2() {
+}
+
 // Implemented by targets that want to run passes immediately before
 // machine code is emitted. return true if -print-machineinstrs should
 // print out the code after the passes.
 void MipsPassConfig::addPreEmitPass() {
+  // Expand pseudo instructions that are sensitive to register allocation.
+  addPass(createMipsExpandPseudoPass());
+
+  // The microMIPS size reduction pass performs instruction reselection for
+  // instructions which can be remapped to a 16 bit instruction.
   addPass(createMicroMipsSizeReducePass());
 
   // The delay slot filler pass can potientially create forbidden slot hazards
 
@@ -1,17 +1,113 @@
-; RUN: llc -O0 -march=mipsel -mcpu=mips32r2 -target-abi=o32 < %s -filetype=asm -o - \
-; RUN:   | FileCheck -check-prefixes=PTR32,ALL %s
-; RUN: llc -O0 -march=mips64el -mcpu=mips64r2 -target-abi=n32 < %s -filetype=asm -o - \
-; RUN:   | FileCheck  -check-prefixes=PTR32,ALL %s
-; RUN: llc -O0 -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s -filetype=asm -o - \
-; RUN:   | FileCheck -check-prefixes=PTR64,ALL %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r2 -target-abi=o32 < %s -filetype=asm -o - \
+; RUN:   | FileCheck -check-prefixes=O32 %s
+; RUN: llc -O0 -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r2 -target-abi=n32 < %s -filetype=asm -o - \
+; RUN:   | FileCheck  -check-prefixes=N32,ALL %s
+; RUN: llc -O0 -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r2 -target-abi=n64 < %s -filetype=asm -o - \
+; RUN:   | FileCheck -check-prefixes=N64 %s
 
-; PTR32: lw $[[R0:[0-9]+]]
-; PTR64: ld $[[R0:[0-9]+]]
+@sym = external global i32 *
 
-; ALL: ll ${{[0-9]+}}, 0($[[R0]])
-
-define {i16, i1} @foo(i16* %addr, i16 signext %r, i16 zeroext %new) {
-  %res = cmpxchg i16* %addr, i16 %r, i16 %new seq_cst seq_cst
-  ret {i16, i1} %res
+define void @foo(i32 %new, i32 %old) {
+; O32-LABEL: foo:
+; O32:       # %bb.0: # %entry
+; O32-NEXT:    addiu $sp, $sp, -16
+; O32-NEXT:    .cfi_def_cfa_offset 16
+; O32-NEXT:    move $1, $5
+; O32-NEXT:    move $2, $4
+; O32-NEXT:    lui $3, %hi(sym)
+; O32-NEXT:    lw $3, %lo(sym)($3)
+; O32-NEXT:    sync
+; O32-NEXT:    lw $6, 12($sp) # 4-byte Folded Reload
+; O32-NEXT:  $BB0_1: # %entry
+; O32-NEXT:    # =>This Inner Loop Header: Depth=1
+; O32-NEXT:    ll $7, 0($3)
+; O32-NEXT:    bne $7, $4, $BB0_3
+; O32-NEXT:    nop
+; O32-NEXT:  # %bb.2: # %entry
+; O32-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; O32-NEXT:    move $8, $5
+; O32-NEXT:    sc $8, 0($3)
+; O32-NEXT:    beqz $8, $BB0_1
+; O32-NEXT:    nop
+; O32-NEXT:  $BB0_3: # %entry
+; O32-NEXT:    sync
+; O32-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
+; O32-NEXT:    sw $6, 8($sp) # 4-byte Folded Spill
+; O32-NEXT:    sw $1, 4($sp) # 4-byte Folded Spill
+; O32-NEXT:    sw $2, 0($sp) # 4-byte Folded Spill
+; O32-NEXT:    addiu $sp, $sp, 16
+; O32-NEXT:    jr $ra
+; O32-NEXT:    nop
+;
+; N32-LABEL: foo:
+; N32:       # %bb.0: # %entry
+; N32-NEXT:    addiu $sp, $sp, -16
+; N32-NEXT:    .cfi_def_cfa_offset 16
+; N32-NEXT:    move $1, $5
+; N32-NEXT:    sll $1, $1, 0
+; N32-NEXT:    move $2, $4
+; N32-NEXT:    sll $2, $2, 0
+; N32-NEXT:    lui $3, %hi(sym)
+; N32-NEXT:    lw $3, %lo(sym)($3)
+; N32-NEXT:    sync
+; N32-NEXT:    lw $6, 12($sp) # 4-byte Folded Reload
+; N32-NEXT:  .LBB0_1: # %entry
+; N32-NEXT:    # =>This Inner Loop Header: Depth=1
+; N32-NEXT:    ll $7, 0($3)
+; N32-NEXT:    bne $7, $2, .LBB0_3
+; N32-NEXT:    nop
+; N32-NEXT:  # %bb.2: # %entry
+; N32-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; N32-NEXT:    move $8, $1
+; N32-NEXT:    sc $8, 0($3)
+; N32-NEXT:    beqz $8, .LBB0_1
+; N32-NEXT:    nop
+; N32-NEXT:  .LBB0_3: # %entry
+; N32-NEXT:    sync
+; N32-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
+; N32-NEXT:    sw $6, 8($sp) # 4-byte Folded Spill
+; N32-NEXT:    addiu $sp, $sp, 16
+; N32-NEXT:    jr $ra
+; N32-NEXT:    nop
+;
+; N64-LABEL: foo:
+; N64:       # %bb.0: # %entry
+; N64-NEXT:    daddiu $sp, $sp, -16
+; N64-NEXT:    .cfi_def_cfa_offset 16
+; N64-NEXT:    move $1, $5
+; N64-NEXT:    sll $1, $1, 0
+; N64-NEXT:    move $2, $4
+; N64-NEXT:    sll $2, $2, 0
+; N64-NEXT:    lui $4, %highest(sym)
+; N64-NEXT:    daddiu $4, $4, %higher(sym)
+; N64-NEXT:    dsll $4, $4, 16
+; N64-NEXT:    daddiu $4, $4, %hi(sym)
+; N64-NEXT:    dsll $4, $4, 16
+; N64-NEXT:    ld $4, %lo(sym)($4)
+; N64-NEXT:    sync
+; N64-NEXT:    lw $3, 12($sp) # 4-byte Folded Reload
+; N64-NEXT:  .LBB0_1: # %entry
+; N64-NEXT:    # =>This Inner Loop Header: Depth=1
+; N64-NEXT:    ll $6, 0($4)
+; N64-NEXT:    bne $6, $2, .LBB0_3
+; N64-NEXT:    nop
+; N64-NEXT:  # %bb.2: # %entry
+; N64-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; N64-NEXT:    move $7, $1
+; N64-NEXT:    sc $7, 0($4)
+; N64-NEXT:    beqz $7, .LBB0_1
+; N64-NEXT:    nop
+; N64-NEXT:  .LBB0_3: # %entry
+; N64-NEXT:    sync
+; N64-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; N64-NEXT:    sw $3, 8($sp) # 4-byte Folded Spill
+; N64-NEXT:    daddiu $sp, $sp, 16
+; N64-NEXT:    jr $ra
+; N64-NEXT:    nop
+entry:
+  %0 = load i32 *, i32 ** @sym
+  cmpxchg i32 * %0, i32 %new, i32 %old seq_cst seq_cst
+  ret void
 }
 
@@ -1,18 +1,25 @@
-; RUN: llc %s -march=mipsel -mcpu=mips32r2 -mattr=micromips -filetype=asm \
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r2 -mattr=micromips -filetype=asm \
 ; RUN: -relocation-model=pic -o - | FileCheck %s
 
 @x = common global i32 0, align 4
 
 define i32 @AtomicLoadAdd32(i32 %incr) nounwind {
+; CHECK-LABEL: AtomicLoadAdd32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui $2, %hi(_gp_disp)
+; CHECK-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; CHECK-NEXT:    addu $2, $2, $25
+; CHECK-NEXT:    lw $1, %got(x)($2)
+; CHECK-NEXT:  $BB0_1: # %entry
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ll $2, 0($1)
+; CHECK-NEXT:    addu16 $3, $2, $4
+; CHECK-NEXT:    sc $3, 0($1)
+; CHECK-NEXT:    beqzc $3, $BB0_1
+; CHECK-NEXT:  # %bb.2: # %entry
+; CHECK-NEXT:    jrc $ra
 entry:
   %0 = atomicrmw add i32* @x, i32 %incr monotonic
   ret i32 %0
-
-; CHECK-LABEL:   AtomicLoadAdd32:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK:   ll      $[[R1:[0-9]+]], 0($[[R0]])
-; CHECK:   addu    $[[R2:[0-9]+]], $[[R1]], $4
-; CHECK:   sc      $[[R2]], 0($[[R0]])
-; CHECK:   beqzc   $[[R2]], $[[BB0]]
 }