Index: lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp =================================================================== --- lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -102,6 +102,10 @@ case AArch64::LDADDALh: case AArch64::LDADDALs: case AArch64::LDADDALd: + case AArch64::LDCLRALb: + case AArch64::LDCLRALh: + case AArch64::LDCLRALs: + case AArch64::LDCLRALd: case AArch64::LDEORALb: case AArch64::LDEORALh: case AArch64::LDEORALs: Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10567,9 +10567,6 @@ if (Size > 128) return AtomicExpansionKind::None; // Nand not supported in LSE. if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC; - // Currently leaving And and Sub to LLSC - if ((AI->getOperation() == AtomicRMWInst::And) || (AI->getOperation() == AtomicRMWInst::Sub)) - return AtomicExpansionKind::LLSC; // Leave 128 bits to LLSC. return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC; } Index: lib/Target/AArch64/AArch64InstrAtomics.td =================================================================== --- lib/Target/AArch64/AArch64InstrAtomics.td +++ lib/Target/AArch64/AArch64InstrAtomics.td @@ -451,3 +451,13 @@ def : Pat<(atomic_swap_16 GPR64:$Rn, GPR32:$Rs), (SWPALh GPR32:$Rs, GPR64sp:$Rn)>; def : Pat<(atomic_swap_32 GPR64:$Rn, GPR32:$Rs), (SWPALs GPR32:$Rs, GPR64sp:$Rn)>; def : Pat<(atomic_swap_64 GPR64:$Rn, GPR64:$Rs), (SWPALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_sub_8 GPR64:$Rn, GPR32:$Rs), (LDADDALb (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_sub_16 GPR64:$Rn, GPR32:$Rs), (LDADDALh (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_sub_32 GPR64:$Rn, GPR32:$Rs), (LDADDALs (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_sub_64 GPR64:$Rn, GPR64:$Rs), (LDADDALd (SUBXrr XZR, GPR64:$Rs), GPR64sp:$Rn)>; + +def : Pat<(atomic_load_and_8 GPR64:$Rn, GPR32:$Rs), (LDCLRALb (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_and_16 GPR64:$Rn, GPR32:$Rs), (LDCLRALh (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_and_32 GPR64:$Rn, GPR32:$Rs), (LDCLRALs (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_and_64 GPR64:$Rn, GPR64:$Rs), (LDCLRALd (ORNXrr XZR, GPR64:$Rs), GPR64sp:$Rn)>; Index: test/CodeGen/AArch64/atomic-ops-lse.ll =================================================================== --- test/CodeGen/AArch64/atomic-ops-lse.ll +++ test/CodeGen/AArch64/atomic-ops-lse.ll @@ -681,3 +681,164 @@ ret i64 %old } +define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i8: + %old = atomicrmw sub i8* @var8, i8 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 + +; CHECK: ldaddalb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i8 %old +} + +define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i16: + %old = atomicrmw sub i16* @var16, i16 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 + +; CHECK: ldaddalh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i16 %old +} + +define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i32: + %old = atomicrmw sub i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 + +; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i32 %old +} + +define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i64: + %old = atomicrmw sub i64* @var64, i64 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 + +; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i64 %old +} + +define void @test_atomic_load_sub_i32_noret(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i32_noret: + atomicrmw sub i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 + +; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret void +} + +define void @test_atomic_load_sub_i64_noret(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i64_noret: + atomicrmw sub i64* @var64, i64 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 + +; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret void +} + +define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i8: + %old = atomicrmw and i8* @var8, i8 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 + +; CHECK: ldclralb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i8 %old +} + +define i16 @test_atomic_load_and_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i16: + %old = atomicrmw and i16* @var16, i16 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 + +; CHECK: ldclralh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i16 %old +} + +define i32 @test_atomic_load_and_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i32: + %old = atomicrmw and i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 + +; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i32 %old +} + +define i64 @test_atomic_load_and_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i64: + %old = atomicrmw and i64* @var64, i64 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 + +; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i64 %old +} + +define void @test_atomic_load_and_i32_noret(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i32_noret: + atomicrmw and i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 + +; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret void +} + +define void @test_atomic_load_and_i64_noret(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i64_noret: + atomicrmw and i64* @var64, i64 %offset seq_cst +; CHECK-NOT: dmb +; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 + +; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret void +}