diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td --- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td @@ -102,6 +102,34 @@ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), (LDURXi GPR64sp:$Rn, simm9:$offset)>; +// FP 32-bit loads +def : Pat<(f32 (bitconvert (i32 (relaxed_load (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend32:$extend))))), + (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; +def : Pat<(f32 (bitconvert (i32 (relaxed_load (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend32:$extend))))), + (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; +def : Pat<(f32 (bitconvert (i32 (relaxed_load (am_indexed32 GPR64sp:$Rn, + uimm12s8:$offset))))), + (LDRSui GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(f32 (bitconvert (i32 (relaxed_load + (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), + (LDURSi GPR64sp:$Rn, simm9:$offset)>; + +// FP 64-bit loads +def : Pat<(f64 (bitconvert (i64 (relaxed_load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend64:$extend))))), + (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; +def : Pat<(f64 (bitconvert (i64 (relaxed_load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend64:$extend))))), + (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; +def : Pat<(f64 (bitconvert (i64 (relaxed_load (am_indexed64 GPR64sp:$Rn, + uimm12s8:$offset))))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(f64 (bitconvert (i64 (relaxed_load + (am_unscaled64 GPR64sp:$Rn, simm9:$offset))))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + //===---------------------------------- // Atomic stores //===---------------------------------- @@ -196,6 +224,38 @@ (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val), (STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>; +// FP 32-bit stores +def : Pat<(relaxed_store (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend32:$extend), + (i32 (bitconvert (f32 FPR32Op:$val)))), + (STRSroW FPR32Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; +def : Pat<(relaxed_store (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend32:$extend), + (i32 (bitconvert (f32 FPR32Op:$val)))), + (STRSroX FPR32Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; +def : Pat<(relaxed_store + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))), + (STRSui FPR32Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>; +def : Pat<(relaxed_store + (am_unscaled32 GPR64sp:$Rn, simm9:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))), + (STURSi FPR32Op:$val, GPR64sp:$Rn, simm9:$offset)>; + +// FP 64-bit stores +def : Pat<(relaxed_store (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend64:$extend), + (i64 (bitconvert (f64 FPR64Op:$val)))), + (STRDroW FPR64Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; +def : Pat<(relaxed_store (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend64:$extend), + (i64 (bitconvert (f64 FPR64Op:$val)))), + (STRDroX FPR64Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; +def : Pat<(relaxed_store + (am_indexed64 GPR64sp:$Rn, uimm12s4:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))), + (STRDui FPR64Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>; +def : Pat<(relaxed_store + (am_unscaled64 GPR64sp:$Rn, simm9:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))), + (STURDi FPR64Op:$val, GPR64sp:$Rn, simm9:$offset)>; + //===---------------------------------- // Low-level exclusive operations //===---------------------------------- diff --git a/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll b/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll @@ -0,0 +1,94 @@ +; PR52927: Relaxed atomics can load to/store from fp regs directly +; RUN: llc < %s -mtriple=arm64-eabi -asm-verbose=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s + +define float @atomic_load_relaxed_f32(float* %p, i32 %off32, i64 %off64) #0 { +; CHECK-LABEL: atomic_load_relaxed_f32: + %ptr_unsigned = getelementptr float, float* %p, i32 4095 + %val_unsigned = load atomic float, float* %ptr_unsigned monotonic, align 4 +; CHECK: ldr {{s[0-9]+}}, [x0, #16380] + + %ptr_regoff = getelementptr float, float* %p, i32 %off32 + %val_regoff = load atomic float, float* %ptr_regoff unordered, align 4 + %tot1 = fadd float %val_unsigned, %val_regoff +; CHECK: ldr {{s[0-9]+}}, [x0, w1, sxtw #2] + + %ptr_regoff64 = getelementptr float, float* %p, i64 %off64 + %val_regoff64 = load atomic float, float* %ptr_regoff64 monotonic, align 4 + %tot2 = fadd float %tot1, %val_regoff64 +; CHECK: ldr {{s[0-9]+}}, [x0, x2, lsl #2] + + %ptr_unscaled = getelementptr float, float* %p, i32 -64 + %val_unscaled = load atomic float, float* %ptr_unscaled unordered, align 4 + %tot3 = fadd float %tot2, %val_unscaled +; CHECK: ldur {{s[0-9]+}}, [x0, #-256] + + ret float %tot3 +} + +define double @atomic_load_relaxed_f64(double* %p, i32 %off32, i64 %off64) #0 { +; CHECK-LABEL: atomic_load_relaxed_f64: + %ptr_unsigned = getelementptr double, double* %p, i32 4095 + %val_unsigned = load atomic double, double* %ptr_unsigned monotonic, align 8 +; CHECK: ldr {{d[0-9]+}}, [x0, #32760] + + %ptr_regoff = getelementptr double, double* %p, i32 %off32 + %val_regoff = load atomic double, double* %ptr_regoff unordered, align 8 + %tot1 = fadd double %val_unsigned, %val_regoff +; CHECK: ldr {{d[0-9]+}}, [x0, w1, sxtw #3] + + %ptr_regoff64 = getelementptr double, double* %p, i64 %off64 + %val_regoff64 = load atomic double, double* %ptr_regoff64 monotonic, align 8 + %tot2 = fadd double %tot1, %val_regoff64 +; CHECK: ldr {{d[0-9]+}}, [x0, x2, lsl #3] + + %ptr_unscaled = getelementptr double, double* %p, i32 -32 + %val_unscaled = load atomic double, double* %ptr_unscaled unordered, align 8 + %tot3 = fadd double %tot2, %val_unscaled +; CHECK: ldur {{d[0-9]+}}, [x0, #-256] + + ret double %tot3 +} + +define void @atomic_store_relaxed_f32(float* %p, i32 %off32, i64 %off64, float %val) #0 { +; CHECK-LABEL: atomic_store_relaxed_f32: + %ptr_unsigned = getelementptr float, float* %p, i32 4095 + store atomic float %val, float* %ptr_unsigned monotonic, align 4 +; CHECK: str {{s[0-9]+}}, [x0, #16380] + + %ptr_regoff = getelementptr float, float* %p, i32 %off32 + store atomic float %val, float* %ptr_regoff unordered, align 4 +; CHECK: str {{s[0-9]+}}, [x0, w1, sxtw #2] + + %ptr_regoff64 = getelementptr float, float* %p, i64 %off64 + store atomic float %val, float* %ptr_regoff64 monotonic, align 4 +; CHECK: str {{s[0-9]+}}, [x0, x2, lsl #2] + + %ptr_unscaled = getelementptr float, float* %p, i32 -64 + store atomic float %val, float* %ptr_unscaled unordered, align 4 +; CHECK: stur {{s[0-9]+}}, [x0, #-256] + + ret void +} + +define void @atomic_store_relaxed_f64(double* %p, i32 %off32, i64 %off64, double %val) #0 { +; CHECK-LABEL: atomic_store_relaxed_f64: + %ptr_unsigned = getelementptr double, double* %p, i32 4095 + store atomic double %val, double* %ptr_unsigned monotonic, align 8 +; CHECK: str {{d[0-9]+}}, [x0, #32760] + + %ptr_regoff = getelementptr double, double* %p, i32 %off32 + store atomic double %val, double* %ptr_regoff unordered, align 8 +; CHECK: str {{d[0-9]+}}, [x0, w1, sxtw #3] + + %ptr_regoff64 = getelementptr double, double* %p, i64 %off64 + store atomic double %val, double* %ptr_regoff64 unordered, align 8 +; CHECK: str {{d[0-9]+}}, [x0, x2, lsl #3] + + %ptr_unscaled = getelementptr double, double* %p, i32 -32 + store atomic double %val, double* %ptr_unscaled monotonic, align 8 +; CHECK: stur {{d[0-9]+}}, [x0, #-256] + + ret void +} + +attributes #0 = { nounwind }