Skip to content

Commit 3239ba8

Browse files
author
Aleksandar Beserminji
committedJul 5, 2018
[mips] Fix atomic operations at O0, v3
Similar to PR/25526, fast-regalloc introduces spills at the end of basic blocks. When this occurs in between an ll and sc, the stores can cause the atomic sequence to fail. This patch fixes the issue by introducing more pseudos to represent atomic operations and moving their lowering to after the expansion of postRA pseudos. This version addresses issues with the initial implementation and covers all atomic operations. This resolves PR/32020. Thanks to James Cowgill for reporting the issue! Patch By: Simon Dardis Differential Revision: https://reviews.llvm.org/D31287 llvm-svn: 336328
1 parent b41c61e commit 3239ba8

12 files changed

+10095
-765
lines changed
 

‎llvm/lib/Target/Mips/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ add_llvm_target(MipsCodeGen
3030
MipsCCState.cpp
3131
MipsConstantIslandPass.cpp
3232
MipsDelaySlotFiller.cpp
33+
MipsExpandPseudo.cpp
3334
MipsFastISel.cpp
3435
MipsInstrInfo.cpp
3536
MipsInstructionSelector.cpp

‎llvm/lib/Target/Mips/Mips.h

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ namespace llvm {
3737
FunctionPass *createMipsBranchExpansion();
3838
FunctionPass *createMipsConstantIslandPass();
3939
FunctionPass *createMicroMipsSizeReducePass();
40+
FunctionPass *createMipsExpandPseudoPass();
4041

4142
InstructionSelector *createMipsInstructionSelector(const MipsTargetMachine &,
4243
MipsSubtarget &,

‎llvm/lib/Target/Mips/Mips64InstrInfo.td

+11
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,17 @@ let usesCustomInserter = 1 in {
8585
def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<atomic_cmp_swap_64, GPR64>;
8686
}
8787

88+
def ATOMIC_LOAD_ADD_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
89+
def ATOMIC_LOAD_SUB_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
90+
def ATOMIC_LOAD_AND_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
91+
def ATOMIC_LOAD_OR_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
92+
def ATOMIC_LOAD_XOR_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
93+
def ATOMIC_LOAD_NAND_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
94+
95+
def ATOMIC_SWAP_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
96+
97+
def ATOMIC_CMP_SWAP_I64_POSTRA : AtomicCmpSwapPostRA<GPR64>;
98+
8899
/// Pseudo instructions for loading and storing accumulator registers.
89100
let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
90101
def LOAD_ACC128 : Load<"", ACC128>;

‎llvm/lib/Target/Mips/MipsExpandPseudo.cpp

+702
Large diffs are not rendered by default.

‎llvm/lib/Target/Mips/MipsISelLowering.cpp

+276-329
Large diffs are not rendered by default.

‎llvm/lib/Target/Mips/MipsISelLowering.h

+4-8
Original file line numberDiff line numberDiff line change
@@ -679,17 +679,13 @@ class TargetRegisterClass;
679679
unsigned Size, unsigned DstReg,
680680
unsigned SrcRec) const;
681681

682-
MachineBasicBlock *emitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
683-
unsigned Size, unsigned BinOpcode,
684-
bool Nand = false) const;
682+
MachineBasicBlock *emitAtomicBinary(MachineInstr &MI,
683+
MachineBasicBlock *BB) const;
685684
MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr &MI,
686685
MachineBasicBlock *BB,
687-
unsigned Size,
688-
unsigned BinOpcode,
689-
bool Nand = false) const;
686+
unsigned Size) const;
690687
MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI,
691-
MachineBasicBlock *BB,
692-
unsigned Size) const;
688+
MachineBasicBlock *BB) const;
693689
MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr &MI,
694690
MachineBasicBlock *BB,
695691
unsigned Size) const;

‎llvm/lib/Target/Mips/MipsInstrInfo.td

+54
Original file line numberDiff line numberDiff line change
@@ -1852,11 +1852,37 @@ class Atomic2Ops<PatFrag Op, RegisterClass DRC> :
18521852
PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$incr),
18531853
[(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]>;
18541854

1855+
class Atomic2OpsPostRA<RegisterClass RC> :
1856+
PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr), []> {
1857+
let mayLoad = 1;
1858+
let mayStore = 1;
1859+
}
1860+
1861+
class Atomic2OpsSubwordPostRA<RegisterClass RC> :
1862+
PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr, RC:$mask, RC:$mask2,
1863+
RC:$shiftamnt), []>;
1864+
18551865
// Atomic Compare & Swap.
1866+
// Atomic compare and swap is lowered into two stages. The first stage happens
1867+
// during ISelLowering, which produces the PostRA version of this instruction.
18561868
class AtomicCmpSwap<PatFrag Op, RegisterClass DRC> :
18571869
PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap),
18581870
[(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>;
18591871

1872+
class AtomicCmpSwapPostRA<RegisterClass RC> :
1873+
PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$cmp, RC:$swap), []> {
1874+
let mayLoad = 1;
1875+
let mayStore = 1;
1876+
}
1877+
1878+
class AtomicCmpSwapSubwordPostRA<RegisterClass RC> :
1879+
PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal,
1880+
RC:$mask2, RC:$ShiftNewVal, RC:$ShiftAmt), []> {
1881+
let mayLoad = 1;
1882+
let mayStore = 1;
1883+
}
1884+
1885+
18601886
class LLBase<string opstr, RegisterOperand RO, DAGOperand MO = mem> :
18611887
InstSE<(outs RO:$rt), (ins MO:$addr), !strconcat(opstr, "\t$rt, $addr"),
18621888
[], II_LL, FrmI, opstr> {
@@ -1942,8 +1968,36 @@ let usesCustomInserter = 1 in {
19421968
def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<atomic_cmp_swap_8, GPR32>;
19431969
def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<atomic_cmp_swap_16, GPR32>;
19441970
def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<atomic_cmp_swap_32, GPR32>;
1971+
19451972
}
19461973

1974+
def ATOMIC_LOAD_ADD_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
1975+
def ATOMIC_LOAD_ADD_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
1976+
def ATOMIC_LOAD_ADD_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
1977+
def ATOMIC_LOAD_SUB_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
1978+
def ATOMIC_LOAD_SUB_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
1979+
def ATOMIC_LOAD_SUB_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
1980+
def ATOMIC_LOAD_AND_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
1981+
def ATOMIC_LOAD_AND_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
1982+
def ATOMIC_LOAD_AND_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
1983+
def ATOMIC_LOAD_OR_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
1984+
def ATOMIC_LOAD_OR_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
1985+
def ATOMIC_LOAD_OR_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
1986+
def ATOMIC_LOAD_XOR_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
1987+
def ATOMIC_LOAD_XOR_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
1988+
def ATOMIC_LOAD_XOR_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
1989+
def ATOMIC_LOAD_NAND_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
1990+
def ATOMIC_LOAD_NAND_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
1991+
def ATOMIC_LOAD_NAND_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
1992+
1993+
def ATOMIC_SWAP_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
1994+
def ATOMIC_SWAP_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
1995+
def ATOMIC_SWAP_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
1996+
1997+
def ATOMIC_CMP_SWAP_I8_POSTRA : AtomicCmpSwapSubwordPostRA<GPR32>;
1998+
def ATOMIC_CMP_SWAP_I16_POSTRA : AtomicCmpSwapSubwordPostRA<GPR32>;
1999+
def ATOMIC_CMP_SWAP_I32_POSTRA : AtomicCmpSwapPostRA<GPR32>;
2000+
19472001
/// Pseudo instructions for loading and storing accumulator registers.
19482002
let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
19492003
def LOAD_ACC64 : Load<"", ACC64>;

‎llvm/lib/Target/Mips/MipsTargetMachine.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ class MipsPassConfig : public TargetPassConfig {
240240
bool addInstSelector() override;
241241
void addPreEmitPass() override;
242242
void addPreRegAlloc() override;
243+
void addPreEmit2() ;
243244
bool addIRTranslator() override;
244245
bool addLegalizeMachineIR() override;
245246
bool addRegBankSelect() override;
@@ -285,10 +286,18 @@ MipsTargetMachine::getTargetTransformInfo(const Function &F) {
285286
return TargetTransformInfo(BasicTTIImpl(this, F));
286287
}
287288

289+
void MipsPassConfig::addPreEmit2() {
290+
}
291+
288292
// Implemented by targets that want to run passes immediately before
289293
// machine code is emitted. return true if -print-machineinstrs should
290294
// print out the code after the passes.
291295
void MipsPassConfig::addPreEmitPass() {
296+
// Expand pseudo instructions that are sensitive to register allocation.
297+
addPass(createMipsExpandPseudoPass());
298+
299+
// The microMIPS size reduction pass performs instruction reselection for
300+
// instructions which can be remapped to a 16 bit instruction.
292301
addPass(createMicroMipsSizeReducePass());
293302

294303
// The delay slot filler pass can potientially create forbidden slot hazards

‎llvm/test/CodeGen/Mips/atomic.ll

+7,515-406
Large diffs are not rendered by default.

‎llvm/test/CodeGen/Mips/atomic64.ll

+1,397
Large diffs are not rendered by default.
+109-13
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,113 @@
1-
; RUN: llc -O0 -march=mipsel -mcpu=mips32r2 -target-abi=o32 < %s -filetype=asm -o - \
2-
; RUN: | FileCheck -check-prefixes=PTR32,ALL %s
3-
; RUN: llc -O0 -march=mips64el -mcpu=mips64r2 -target-abi=n32 < %s -filetype=asm -o - \
4-
; RUN: | FileCheck -check-prefixes=PTR32,ALL %s
5-
; RUN: llc -O0 -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s -filetype=asm -o - \
6-
; RUN: | FileCheck -check-prefixes=PTR64,ALL %s
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -O0 -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r2 -target-abi=o32 < %s -filetype=asm -o - \
3+
; RUN: | FileCheck -check-prefixes=O32 %s
4+
; RUN: llc -O0 -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r2 -target-abi=n32 < %s -filetype=asm -o - \
5+
; RUN: | FileCheck -check-prefixes=N32,ALL %s
6+
; RUN: llc -O0 -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r2 -target-abi=n64 < %s -filetype=asm -o - \
7+
; RUN: | FileCheck -check-prefixes=N64 %s
78

8-
; PTR32: lw $[[R0:[0-9]+]]
9-
; PTR64: ld $[[R0:[0-9]+]]
9+
@sym = external global i32 *
1010

11-
; ALL: ll ${{[0-9]+}}, 0($[[R0]])
12-
13-
define {i16, i1} @foo(i16* %addr, i16 signext %r, i16 zeroext %new) {
14-
%res = cmpxchg i16* %addr, i16 %r, i16 %new seq_cst seq_cst
15-
ret {i16, i1} %res
11+
define void @foo(i32 %new, i32 %old) {
12+
; O32-LABEL: foo:
13+
; O32: # %bb.0: # %entry
14+
; O32-NEXT: addiu $sp, $sp, -16
15+
; O32-NEXT: .cfi_def_cfa_offset 16
16+
; O32-NEXT: move $1, $5
17+
; O32-NEXT: move $2, $4
18+
; O32-NEXT: lui $3, %hi(sym)
19+
; O32-NEXT: lw $3, %lo(sym)($3)
20+
; O32-NEXT: sync
21+
; O32-NEXT: lw $6, 12($sp) # 4-byte Folded Reload
22+
; O32-NEXT: $BB0_1: # %entry
23+
; O32-NEXT: # =>This Inner Loop Header: Depth=1
24+
; O32-NEXT: ll $7, 0($3)
25+
; O32-NEXT: bne $7, $4, $BB0_3
26+
; O32-NEXT: nop
27+
; O32-NEXT: # %bb.2: # %entry
28+
; O32-NEXT: # in Loop: Header=BB0_1 Depth=1
29+
; O32-NEXT: move $8, $5
30+
; O32-NEXT: sc $8, 0($3)
31+
; O32-NEXT: beqz $8, $BB0_1
32+
; O32-NEXT: nop
33+
; O32-NEXT: $BB0_3: # %entry
34+
; O32-NEXT: sync
35+
; O32-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
36+
; O32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill
37+
; O32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
38+
; O32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
39+
; O32-NEXT: addiu $sp, $sp, 16
40+
; O32-NEXT: jr $ra
41+
; O32-NEXT: nop
42+
;
43+
; N32-LABEL: foo:
44+
; N32: # %bb.0: # %entry
45+
; N32-NEXT: addiu $sp, $sp, -16
46+
; N32-NEXT: .cfi_def_cfa_offset 16
47+
; N32-NEXT: move $1, $5
48+
; N32-NEXT: sll $1, $1, 0
49+
; N32-NEXT: move $2, $4
50+
; N32-NEXT: sll $2, $2, 0
51+
; N32-NEXT: lui $3, %hi(sym)
52+
; N32-NEXT: lw $3, %lo(sym)($3)
53+
; N32-NEXT: sync
54+
; N32-NEXT: lw $6, 12($sp) # 4-byte Folded Reload
55+
; N32-NEXT: .LBB0_1: # %entry
56+
; N32-NEXT: # =>This Inner Loop Header: Depth=1
57+
; N32-NEXT: ll $7, 0($3)
58+
; N32-NEXT: bne $7, $2, .LBB0_3
59+
; N32-NEXT: nop
60+
; N32-NEXT: # %bb.2: # %entry
61+
; N32-NEXT: # in Loop: Header=BB0_1 Depth=1
62+
; N32-NEXT: move $8, $1
63+
; N32-NEXT: sc $8, 0($3)
64+
; N32-NEXT: beqz $8, .LBB0_1
65+
; N32-NEXT: nop
66+
; N32-NEXT: .LBB0_3: # %entry
67+
; N32-NEXT: sync
68+
; N32-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
69+
; N32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill
70+
; N32-NEXT: addiu $sp, $sp, 16
71+
; N32-NEXT: jr $ra
72+
; N32-NEXT: nop
73+
;
74+
; N64-LABEL: foo:
75+
; N64: # %bb.0: # %entry
76+
; N64-NEXT: daddiu $sp, $sp, -16
77+
; N64-NEXT: .cfi_def_cfa_offset 16
78+
; N64-NEXT: move $1, $5
79+
; N64-NEXT: sll $1, $1, 0
80+
; N64-NEXT: move $2, $4
81+
; N64-NEXT: sll $2, $2, 0
82+
; N64-NEXT: lui $4, %highest(sym)
83+
; N64-NEXT: daddiu $4, $4, %higher(sym)
84+
; N64-NEXT: dsll $4, $4, 16
85+
; N64-NEXT: daddiu $4, $4, %hi(sym)
86+
; N64-NEXT: dsll $4, $4, 16
87+
; N64-NEXT: ld $4, %lo(sym)($4)
88+
; N64-NEXT: sync
89+
; N64-NEXT: lw $3, 12($sp) # 4-byte Folded Reload
90+
; N64-NEXT: .LBB0_1: # %entry
91+
; N64-NEXT: # =>This Inner Loop Header: Depth=1
92+
; N64-NEXT: ll $6, 0($4)
93+
; N64-NEXT: bne $6, $2, .LBB0_3
94+
; N64-NEXT: nop
95+
; N64-NEXT: # %bb.2: # %entry
96+
; N64-NEXT: # in Loop: Header=BB0_1 Depth=1
97+
; N64-NEXT: move $7, $1
98+
; N64-NEXT: sc $7, 0($4)
99+
; N64-NEXT: beqz $7, .LBB0_1
100+
; N64-NEXT: nop
101+
; N64-NEXT: .LBB0_3: # %entry
102+
; N64-NEXT: sync
103+
; N64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
104+
; N64-NEXT: sw $3, 8($sp) # 4-byte Folded Spill
105+
; N64-NEXT: daddiu $sp, $sp, 16
106+
; N64-NEXT: jr $ra
107+
; N64-NEXT: nop
108+
entry:
109+
%0 = load i32 *, i32 ** @sym
110+
cmpxchg i32 * %0, i32 %new, i32 %old seq_cst seq_cst
111+
ret void
16112
}
17113

+16-9
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,25 @@
1-
; RUN: llc %s -march=mipsel -mcpu=mips32r2 -mattr=micromips -filetype=asm \
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc %s -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r2 -mattr=micromips -filetype=asm \
23
; RUN: -relocation-model=pic -o - | FileCheck %s
34

45
@x = common global i32 0, align 4
56

67
define i32 @AtomicLoadAdd32(i32 %incr) nounwind {
8+
; CHECK-LABEL: AtomicLoadAdd32:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: lui $2, %hi(_gp_disp)
11+
; CHECK-NEXT: addiu $2, $2, %lo(_gp_disp)
12+
; CHECK-NEXT: addu $2, $2, $25
13+
; CHECK-NEXT: lw $1, %got(x)($2)
14+
; CHECK-NEXT: $BB0_1: # %entry
15+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
16+
; CHECK-NEXT: ll $2, 0($1)
17+
; CHECK-NEXT: addu16 $3, $2, $4
18+
; CHECK-NEXT: sc $3, 0($1)
19+
; CHECK-NEXT: beqzc $3, $BB0_1
20+
; CHECK-NEXT: # %bb.2: # %entry
21+
; CHECK-NEXT: jrc $ra
722
entry:
823
%0 = atomicrmw add i32* @x, i32 %incr monotonic
924
ret i32 %0
10-
11-
; CHECK-LABEL: AtomicLoadAdd32:
12-
; CHECK: lw $[[R0:[0-9]+]], %got(x)
13-
; CHECK: $[[BB0:[A-Z_0-9]+]]:
14-
; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]])
15-
; CHECK: addu $[[R2:[0-9]+]], $[[R1]], $4
16-
; CHECK: sc $[[R2]], 0($[[R0]])
17-
; CHECK: beqzc $[[R2]], $[[BB0]]
1825
}

0 commit comments

Comments
 (0)
Please sign in to comment.