Index: llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -96,6 +96,8 @@ bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectIntrinsicWithSideEffects(MachineInstr &I, + MachineRegisterInfo &MRI) const; unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const; MachineInstr *emitLoadFromConstantPool(Constant *CPVal, @@ -1735,14 +1737,7 @@ return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) : selectVaStartAAPCS(I, MF, MRI); case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: - if (!I.getOperand(0).isIntrinsicID()) - return false; - if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap) - return false; - BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::BRK)) - .addImm(1); - I.eraseFromParent(); - return true; + return selectIntrinsicWithSideEffects(I, MRI); case TargetOpcode::G_IMPLICIT_DEF: { I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); @@ -2705,6 +2700,72 @@ return true; } +/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr +/// intrinsic. +static unsigned getStlxrOpcode(unsigned NumBytesToStore) { + switch (NumBytesToStore) { + // TODO: 1, 2, and 4 byte stores. + case 8: + return AArch64::STLXRX; + default: + LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! (" + << NumBytesToStore << ")\n"); + break; + } + return 0; +} + +bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( + MachineInstr &I, MachineRegisterInfo &MRI) const { + // Find the intrinsic ID. + auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) { + return Op.isIntrinsicID(); + }); + if (IntrinOp == I.operands_end()) + return false; + unsigned IntrinID = IntrinOp->getIntrinsicID(); + MachineBasicBlock &MBB = *I.getParent(); + MachineIRBuilder MIRBuilder(I); + + // Select the instruction. + switch (IntrinID) { + default: + return false; + case Intrinsic::trap: + BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::BRK)).addImm(1); + break; + case Intrinsic::aarch64_stlxr: + unsigned StatReg = I.getOperand(0).getReg(); + assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 && + "Status register must be 32 bits!"); + unsigned SrcReg = I.getOperand(2).getReg(); + + if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) { + LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n"); + return false; + } + + unsigned PtrReg = I.getOperand(3).getReg(); + assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand"); + + // Expect only one memory operand. + if (!I.hasOneMemOperand()) + return false; + + const MachineMemOperand *MemOp = *I.memoperands_begin(); + unsigned NumBytesToStore = MemOp->getSize(); + unsigned Opc = getStlxrOpcode(NumBytesToStore); + if (!Opc) + return false; + + auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg}); + constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI); + } + + I.eraseFromParent(); + return true; +} + /// SelectArithImmed - Select an immediate value that can be represented as /// a 12-bit value shifted left by either 0 or 12. If so, return true with /// Val set to the 12-bit value and Shift set to the shifter operand. Index: llvm/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir @@ -0,0 +1,32 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- | + define i32 @test_store_release_i64(i32 %a, i64* %addr) { + ret i32 %a + } +... +--- +name: test_store_release_i64 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $x1, $x2 + + ; CHECK-LABEL: name: test_store_release_i64 + ; CHECK: liveins: $w0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x2 + ; CHECK: early-clobber %2:gpr32 = STLXRX [[COPY]], [[COPY1]] + ; CHECK: $w0 = COPY %2 + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr(s64) = COPY $x1 + %2:gpr(p0) = COPY $x2 + %3:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stlxr), %1(s64), %2(p0) :: (volatile store 8 into %ir.addr) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... Index: llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll +++ llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s +; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mtriple=arm64-linux-gnu 2>&1 | FileCheck %s --check-prefixes=GISEL,FALLBACK %0 = type { i64, i64 } @@ -257,9 +258,12 @@ ret i32 %res } +; FALLBACK-NOT: remark:{{.*}}test_store_release_i64 define i32 @test_store_release_i64(i32, i64 %val, i64* %addr) { ; CHECK-LABEL: test_store_release_i64: ; CHECK: stlxr w0, x1, [x2] +; GISEL-LABEL: test_store_release_i64: +; GISEL: stlxr w0, x1, [x2] %res = call i32 @llvm.aarch64.stlxr.p0i64(i64 %val, i64* %addr) ret i32 %res }