Index: llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h @@ -779,6 +779,7 @@ ATOMIC_LOAD_ADD, ATOMIC_LOAD_SUB, ATOMIC_LOAD_AND, + ATOMIC_LOAD_CLR, ATOMIC_LOAD_OR, ATOMIC_LOAD_XOR, ATOMIC_LOAD_NAND, Index: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h +++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1294,6 +1294,7 @@ N->getOpcode() == ISD::ATOMIC_LOAD_ADD || N->getOpcode() == ISD::ATOMIC_LOAD_SUB || N->getOpcode() == ISD::ATOMIC_LOAD_AND || + N->getOpcode() == ISD::ATOMIC_LOAD_CLR || N->getOpcode() == ISD::ATOMIC_LOAD_OR || N->getOpcode() == ISD::ATOMIC_LOAD_XOR || N->getOpcode() == ISD::ATOMIC_LOAD_NAND || @@ -1345,6 +1346,7 @@ N->getOpcode() == ISD::ATOMIC_LOAD_ADD || N->getOpcode() == ISD::ATOMIC_LOAD_SUB || N->getOpcode() == ISD::ATOMIC_LOAD_AND || + N->getOpcode() == ISD::ATOMIC_LOAD_CLR || N->getOpcode() == ISD::ATOMIC_LOAD_OR || N->getOpcode() == ISD::ATOMIC_LOAD_XOR || N->getOpcode() == ISD::ATOMIC_LOAD_NAND || Index: llvm/trunk/include/llvm/Target/TargetSelectionDAG.td =================================================================== --- llvm/trunk/include/llvm/Target/TargetSelectionDAG.td +++ llvm/trunk/include/llvm/Target/TargetSelectionDAG.td @@ -485,6 +485,8 @@ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def atomic_load_clr : SDNode<"ISD::ATOMIC_LOAD_CLR" , SDTAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_or : SDNode<"ISD::ATOMIC_LOAD_OR" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , SDTAtomic2, @@ -1244,6 +1246,7 @@ defm atomic_swap : binary_atomic_op; defm atomic_load_sub : binary_atomic_op; defm atomic_load_and : binary_atomic_op; +defm atomic_load_clr : binary_atomic_op; defm atomic_load_or : binary_atomic_op; defm atomic_load_xor : binary_atomic_op; defm atomic_load_nand : binary_atomic_op; Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3971,6 +3971,7 @@ case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -146,6 +146,7 @@ case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: @@ -1402,6 +1403,7 @@ case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -493,6 +493,7 @@ case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: @@ -5755,6 +5756,7 @@ assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB || Opcode == ISD::ATOMIC_LOAD_AND || + Opcode == ISD::ATOMIC_LOAD_CLR || Opcode == ISD::ATOMIC_LOAD_OR || Opcode == ISD::ATOMIC_LOAD_XOR || Opcode == ISD::ATOMIC_LOAD_NAND || Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -85,6 +85,7 @@ case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_CLR: return "AtomicLoadClr"; case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h @@ -597,6 +597,7 @@ SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector *Created) const override; Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -466,6 +466,8 @@ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0. // This requires the Performance Monitors extension. @@ -2683,6 +2685,8 @@ return LowerVECREDUCE(Op, DAG); case ISD::ATOMIC_LOAD_SUB: return LowerATOMIC_LOAD_SUB(Op, DAG); + case ISD::ATOMIC_LOAD_AND: + return LowerATOMIC_LOAD_AND(Op, DAG); } } @@ -7306,6 +7310,23 @@ AN->getMemOperand()); } +SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op, + SelectionDAG &DAG) const { + auto &Subtarget = static_cast(DAG.getSubtarget()); + if (!Subtarget.hasLSE()) + return SDValue(); + + // LSE has an atomic load-clear instruction, but not a load-and. + SDLoc dl(Op); + MVT VT = Op.getSimpleValueType(); + SDValue RHS = Op.getOperand(2); + AtomicSDNode *AN = cast(Op.getNode()); + RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS); + return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(), + Op.getOperand(0), Op.getOperand(1), RHS, + AN->getMemOperand()); +} + /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment /// specified in the intrinsic calls. Index: llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td +++ llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td @@ -409,13 +409,18 @@ defm : LDOPregister_patterns<"LDADD", "atomic_load_add">; defm : LDOPregister_patterns<"LDSET", "atomic_load_or">; defm : LDOPregister_patterns<"LDEOR", "atomic_load_xor">; + defm : LDOPregister_patterns<"LDCLR", "atomic_load_clr">; defm : LDOPregister_patterns<"LDSMAX", "atomic_load_max">; defm : LDOPregister_patterns<"LDSMIN", "atomic_load_min">; defm : LDOPregister_patterns<"LDUMAX", "atomic_load_umax">; defm : LDOPregister_patterns<"LDUMIN", "atomic_load_umin">; defm : LDOPregister_patterns<"SWP", "atomic_swap">; + defm : CASregister_patterns<"CAS", "atomic_cmp_swap">; + + // These two patterns are only needed for global isel, selection dag isel + // converts atomic load-sub into a sub and atomic load-add, and likewise for + // and -> clr. defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">; defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">; - defm : CASregister_patterns<"CAS", "atomic_cmp_swap">; } Index: llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll +++ llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll @@ -978,6 +978,102 @@ ret i64 %old } +define i8 @test_atomic_load_and_i8_inv_imm() nounwind { +; CHECK-LABEL: test_atomic_load_and_i8_inv_imm: + %old = atomicrmw and i8* @var8, i8 -2 seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK: orr w[[CONST:[0-9]+]], wzr, #0x1 +; CHECK: ldclralb w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i8 %old +} + +define i16 @test_atomic_load_and_i16_inv_imm() nounwind { +; CHECK-LABEL: test_atomic_load_and_i16_inv_imm: + %old = atomicrmw and i16* @var16, i16 -2 seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK: orr w[[CONST:[0-9]+]], wzr, #0x1 +; CHECK: ldclralh w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i16 %old +} + +define i32 @test_atomic_load_and_i32_inv_imm() nounwind { +; CHECK-LABEL: test_atomic_load_and_i32_inv_imm: + %old = atomicrmw and i32* @var32, i32 -2 seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 +; CHECK: orr w[[CONST:[0-9]+]], wzr, #0x1 +; CHECK: ldclral w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i32 %old +} + +define i64 @test_atomic_load_and_i64_inv_imm() nounwind { +; CHECK-LABEL: test_atomic_load_and_i64_inv_imm: + %old = atomicrmw and i64* @var64, i64 -2 seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 +; CHECK: orr w[[CONST:[0-9]+]], wzr, #0x1 +; CHECK: ldclral x[[CONST]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i64 %old +} + +define i8 @test_atomic_load_and_i8_inv_arg(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i8_inv_arg: + %inv = xor i8 %offset, -1 + %old = atomicrmw and i8* @var8, i8 %inv seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK: ldclralb w0, w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i8 %old +} + +define i16 @test_atomic_load_and_i16_inv_arg(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i16_inv_arg: + %inv = xor i16 %offset, -1 + %old = atomicrmw and i16* @var16, i16 %inv seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK: ldclralh w0, w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i16 %old +} + +define i32 @test_atomic_load_and_i32_inv_arg(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i32_inv_arg: + %inv = xor i32 %offset, -1 + %old = atomicrmw and i32* @var32, i32 %inv seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 +; CHECK: ldclral w0, w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i32 %old +} + +define i64 @test_atomic_load_and_i64_inv_arg(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i64_inv_arg: + %inv = xor i64 %offset, -1 + %old = atomicrmw and i64* @var64, i64 %inv seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 +; CHECK: ldclral x0, x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i64 %old +} + define void @test_atomic_load_and_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret: atomicrmw and i32* @var32, i32 %offset seq_cst