Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22236,6 +22236,22 @@ bool AArch64TargetLowering::shouldInsertFencesForAtomic( const Instruction *I) const { + // Store-Release instructions only provide seq_cst guarantees when paired with + // Load-Acquire instructions. MSVC CRT does not use these instructions to + // implement seq_cst loads and stores, so we need fences. + if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) { + if (auto *RMWI = dyn_cast(I)) { + if (RMWI->getOrdering() == AtomicOrdering::SequentiallyConsistent) + return true; + } else if (auto *CASI = dyn_cast(I)) { + if (CASI->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent) + return true; + } else if (auto *SI = dyn_cast(I)) { + if (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent) + return true; + } + } + return isOpSuitableForLDPSTP(I); } Index: llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll =================================================================== --- llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll +++ llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll @@ -9,16 +9,18 @@ define dso_local i8 @test_atomic_load_add_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: dmb ish ; CHECK-NEXT: adrp x9, var8 ; CHECK-NEXT: add x9, x9, :lo12:var8 ; CHECK-NEXT: .LBB0_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxrb w8, [x9] +; CHECK-NEXT: ldxrb w8, [x9] ; CHECK-NEXT: add w10, w8, w0 -; CHECK-NEXT: stlxrb w11, w10, [x9] +; CHECK-NEXT: stxrb w11, w10, [x9] ; CHECK-NEXT: cbnz w11, .LBB0_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret %old = atomicrmw add ptr @var8, i8 %offset seq_cst ret i8 %old @@ -135,16 +137,18 @@ define dso_local i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: dmb ish ; CHECK-NEXT: adrp x9, var64 ; CHECK-NEXT: add x9, x9, :lo12:var64 ; CHECK-NEXT: .LBB7_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x8, [x9] +; CHECK-NEXT: ldxr x8, [x9] ; CHECK-NEXT: sub x10, x8, x0 -; CHECK-NEXT: stlxr w11, x10, [x9] +; CHECK-NEXT: stxr w11, x10, [x9] ; CHECK-NEXT: cbnz w11, .LBB7_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret %old = atomicrmw sub ptr @var64, i64 %offset seq_cst ret i64 %old @@ -189,16 +193,18 @@ define dso_local i32 @test_atomic_load_and_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: dmb ish ; CHECK-NEXT: adrp x9, var32 ; CHECK-NEXT: add x9, x9, :lo12:var32 ; CHECK-NEXT: .LBB10_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x9] +; CHECK-NEXT: ldxr w8, [x9] ; CHECK-NEXT: and w10, w8, w0 -; CHECK-NEXT: stlxr w11, w10, [x9] +; CHECK-NEXT: stxr w11, w10, [x9] ; CHECK-NEXT: cbnz w11, .LBB10_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret %old = atomicrmw and ptr @var32, i32 %offset seq_cst ret i32 %old @@ -225,16 +231,18 @@ define dso_local i8 @test_atomic_load_or_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: dmb ish ; CHECK-NEXT: adrp x9, var8 ; CHECK-NEXT: add x9, x9, :lo12:var8 ; CHECK-NEXT: .LBB12_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxrb w8, [x9] +; CHECK-NEXT: ldxrb w8, [x9] ; CHECK-NEXT: orr w10, w8, w0 -; CHECK-NEXT: stlxrb w11, w10, [x9] +; CHECK-NEXT: stxrb w11, w10, [x9] ; CHECK-NEXT: cbnz w11, .LBB12_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret %old = atomicrmw or ptr @var8, i8 %offset seq_cst ret i8 %old @@ -333,16 +341,18 @@ define dso_local i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: dmb ish ; CHECK-NEXT: adrp x9, var32 ; CHECK-NEXT: add x9, x9, :lo12:var32 ; CHECK-NEXT: .LBB18_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x9] +; CHECK-NEXT: ldxr w8, [x9] ; CHECK-NEXT: eor w10, w8, w0 -; CHECK-NEXT: stlxr w11, w10, [x9] +; CHECK-NEXT: stxr w11, w10, [x9] ; CHECK-NEXT: cbnz w11, .LBB18_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret %old = atomicrmw xor ptr @var32, i32 %offset seq_cst ret i32 %old @@ -388,15 +398,17 @@ ; CHECK-LABEL: test_atomic_load_xchg_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: adrp x9, var16 ; CHECK-NEXT: add x9, x9, :lo12:var16 ; CHECK-NEXT: .LBB21_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxrh w8, [x9] -; CHECK-NEXT: stlxrh w10, w0, [x9] +; CHECK-NEXT: ldxrh w8, [x9] +; CHECK-NEXT: stxrh w10, w0, [x9] ; CHECK-NEXT: cbnz w10, .LBB21_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret %old = atomicrmw xchg ptr @var16, i16 %offset seq_cst ret i16 %old @@ -500,17 +512,19 @@ define dso_local i64 @test_atomic_load_min_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: dmb ish ; CHECK-NEXT: adrp x9, var64 ; CHECK-NEXT: add x9, x9, :lo12:var64 ; CHECK-NEXT: .LBB27_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x8, [x9] +; CHECK-NEXT: ldxr x8, [x9] ; CHECK-NEXT: cmp x8, x0 ; CHECK-NEXT: csel x10, x8, x0, le -; CHECK-NEXT: stlxr w11, x10, [x9] +; CHECK-NEXT: stxr w11, x10, [x9] ; CHECK-NEXT: cbnz w11, .LBB27_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret %old = atomicrmw min ptr @var64, i64 %offset seq_cst ret i64 %old @@ -519,18 +533,20 @@ define dso_local i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: dmb ish ; CHECK-NEXT: adrp x9, var8 ; CHECK-NEXT: add x9, x9, :lo12:var8 ; CHECK-NEXT: .LBB28_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxrb w10, [x9] +; CHECK-NEXT: ldxrb w10, [x9] ; CHECK-NEXT: sxtb w8, w10 ; CHECK-NEXT: cmp w8, w0, sxtb ; CHECK-NEXT: csel w10, w10, w0, gt -; CHECK-NEXT: stlxrb w11, w10, [x9] +; CHECK-NEXT: stxrb w11, w10, [x9] ; CHECK-NEXT: cbnz w11, .LBB28_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret %old = atomicrmw max ptr @var8, i8 %offset seq_cst ret i8 %old @@ -637,17 +653,19 @@ define dso_local i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: dmb ish ; CHECK-NEXT: adrp x9, var32 ; CHECK-NEXT: add x9, x9, :lo12:var32 ; CHECK-NEXT: .LBB34_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x9] +; CHECK-NEXT: ldxr w8, [x9] ; CHECK-NEXT: cmp w8, w0 ; CHECK-NEXT: csel w10, w8, w0, ls -; CHECK-NEXT: stlxr w11, w10, [x9] +; CHECK-NEXT: stxr w11, w10, [x9] ; CHECK-NEXT: cbnz w11, .LBB34_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret %old = atomicrmw umin ptr @var32, i32 %offset seq_cst ret i32 %old @@ -715,17 +733,19 @@ define dso_local i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: dmb ish ; CHECK-NEXT: adrp x9, var32 ; CHECK-NEXT: add x9, x9, :lo12:var32 ; CHECK-NEXT: .LBB38_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x9] +; CHECK-NEXT: ldxr w8, [x9] ; CHECK-NEXT: cmp w8, w0 ; CHECK-NEXT: csel w10, w8, w0, hi -; CHECK-NEXT: stlxr w11, w10, [x9] +; CHECK-NEXT: stxr w11, w10, [x9] ; CHECK-NEXT: cbnz w11, .LBB38_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret %old = atomicrmw umax ptr @var32, i32 %offset seq_cst ret i32 %old @@ -780,26 +800,31 @@ define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16: -; CHECK: // %bb.0: +; CHECK: // %bb.0: // %cmpxchg.start ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: adrp x9, var16 -; CHECK-NEXT: add x9, x9, :lo12:var16 -; CHECK-NEXT: .LBB41_1: // %cmpxchg.start -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxrh w0, [x9] -; CHECK-NEXT: cmp w0, w8 +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldxrh w10, [x8] +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: mov w0, w10 +; CHECK-NEXT: cmp w10, w9 ; CHECK-NEXT: b.ne .LBB41_4 -; CHECK-NEXT: // %bb.2: // %cmpxchg.trystore -; CHECK-NEXT: // in Loop: Header=BB41_1 Depth=1 -; CHECK-NEXT: stlxrh w10, w1, [x9] -; CHECK-NEXT: cbnz w10, .LBB41_1 -; CHECK-NEXT: // %bb.3: // %cmpxchg.end -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 -; CHECK-NEXT: ret +; CHECK-NEXT: // %bb.1: // %cmpxchg.fencedstore +; CHECK-NEXT: dmb ish +; CHECK-NEXT: .LBB41_2: // %cmpxchg.trystore +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: stxrh w10, w1, [x8] +; CHECK-NEXT: cbz w10, .LBB41_5 +; CHECK-NEXT: // %bb.3: // %cmpxchg.releasedload +; CHECK-NEXT: // in Loop: Header=BB41_2 Depth=1 +; CHECK-NEXT: ldxrh w0, [x8] +; CHECK-NEXT: cmp w0, w9 +; CHECK-NEXT: b.eq .LBB41_2 ; CHECK-NEXT: .LBB41_4: // %cmpxchg.nostore ; CHECK-NEXT: clrex +; CHECK-NEXT: .LBB41_5: // %cmpxchg.end ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new seq_cst seq_cst %old = extractvalue { i16, i1 } %pair, 0 @@ -970,8 +995,9 @@ ; CHECK-LABEL: test_atomic_store_seq_cst_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, var8 -; CHECK-NEXT: add x8, x8, :lo12:var8 -; CHECK-NEXT: stlrb w0, [x8] +; CHECK-NEXT: dmb ish +; CHECK-NEXT: strb w0, [x8, :lo12:var8] +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret store atomic i8 %val, ptr @var8 seq_cst, align 1 ret void