Index: llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td +++ llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td @@ -333,22 +333,30 @@ def stlxr_1 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }]; +} def stlxr_2 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }]; +} def stlxr_4 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }]; +} def stlxr_8 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }]; +} def : Pat<(stlxr_1 GPR64:$val, GPR64sp:$addr), @@ -438,4 +446,3 @@ defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">; defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">; } - Index: llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -3912,23 +3912,6 @@ return IntrinOp->getIntrinsicID(); } -/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr -/// intrinsic. -static unsigned getStlxrOpcode(unsigned NumBytesToStore) { - switch (NumBytesToStore) { - // TODO: 1 and 2 byte stores - case 4: - return AArch64::STLXRW; - case 8: - return AArch64::STLXRX; - default: - LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! (" - << NumBytesToStore << ")\n"); - break; - } - return 0; -} - bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( MachineInstr &I, MachineRegisterInfo &MRI) const { // Find the intrinsic ID. @@ -3949,48 +3932,6 @@ return false; MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); break; - case Intrinsic::aarch64_stlxr: - Register StatReg = I.getOperand(0).getReg(); - assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 && - "Status register must be 32 bits!"); - Register SrcReg = I.getOperand(2).getReg(); - - if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) { - LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n"); - return false; - } - - Register PtrReg = I.getOperand(3).getReg(); - assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand"); - - // Expect only one memory operand. - if (!I.hasOneMemOperand()) - return false; - - const MachineMemOperand *MemOp = *I.memoperands_begin(); - unsigned NumBytesToStore = MemOp->getSize(); - unsigned Opc = getStlxrOpcode(NumBytesToStore); - if (!Opc) - return false; - unsigned NumBitsToStore = NumBytesToStore * 8; - if (NumBitsToStore != 64) { - // The intrinsic always has a 64-bit source, but we might actually want - // a differently-sized source for the instruction. Try to get it. - // TODO: For 1 and 2-byte stores, this will have a G_AND. For now, let's - // just handle 4-byte stores. - // TODO: If we don't find a G_ZEXT, we'll have to truncate the value down - // to the right size for the STLXR. - MachineInstr *Zext = getOpcodeDef(TargetOpcode::G_ZEXT, SrcReg, MRI); - if (!Zext) - return false; - SrcReg = Zext->getOperand(1).getReg(); - // We should get an appropriately-sized register here. - if (RBI.getSizeInBits(SrcReg, MRI, TRI) != NumBitsToStore) - return false; - } - auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg}) - .addMemOperand(*I.memoperands_begin()); - constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI); } I.eraseFromParent(); Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir @@ -9,6 +9,9 @@ define i32 @test_store_release_i32(i32 %a, i64* %addr) { ret i32 %a } + + define void @test_store_release_i8(i32, i8 %val, i8* %addr) { ret void } + define void @test_store_release_i16(i32, i16 %val, i16* %addr) { ret void } ... --- name: test_store_release_i64 @@ -58,3 +61,61 @@ RET_ReallyLR implicit $w0 ... +--- +name: test_store_release_i8 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $x2 + + ; CHECK-LABEL: name: test_store_release_i8 + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x2 + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64all = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_32 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[INSERT_SUBREG]].sub_32 + ; CHECK: early-clobber %5:gpr32 = STLXRB [[COPY2]], [[COPY1]] :: (volatile store 1 into %ir.addr) + ; CHECK: $w0 = COPY %5 + ; CHECK: RET_ReallyLR implicit $w0 + %3:gpr(s32) = COPY $w1 + %2:gpr(p0) = COPY $x2 + %6:gpr(s64) = G_CONSTANT i64 255 + %7:gpr(s64) = G_ANYEXT %3(s32) + %4:gpr(s64) = G_AND %7, %6 + %5:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stlxr), %4(s64), %2(p0) :: (volatile store 1 into %ir.addr) + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_store_release_i16 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $x2 + + ; CHECK-LABEL: name: test_store_release_i16 + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x2 + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64all = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_32 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[INSERT_SUBREG]].sub_32 + ; CHECK: early-clobber %5:gpr32 = STLXRH [[COPY2]], [[COPY1]] :: (volatile store 2 into %ir.addr) + ; CHECK: $w0 = COPY %5 + ; CHECK: RET_ReallyLR implicit $w0 + %3:gpr(s32) = COPY $w1 + %2:gpr(p0) = COPY $x2 + %6:gpr(s64) = G_CONSTANT i64 65535 + %7:gpr(s64) = G_ANYEXT %3(s32) + %4:gpr(s64) = G_AND %7, %6 + %5:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stlxr), %4(s64), %2(p0) :: (volatile store 2 into %ir.addr) + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 Index: llvm/trunk/test/CodeGen/AArch64/arm64-ldxr-stxr.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-ldxr-stxr.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-ldxr-stxr.ll @@ -268,21 +268,31 @@ declare i64 @llvm.aarch64.ldaxr.p0i32(i32*) nounwind declare i64 @llvm.aarch64.ldaxr.p0i64(i64*) nounwind +; FALLBACK-NOT: remark:{{.*}}test_store_release_i8 define i32 @test_store_release_i8(i32, i8 %val, i8* %addr) { ; CHECK-LABEL: test_store_release_i8: ; CHECK-NOT: uxtb ; CHECK-NOT: and ; CHECK: stlxrb w0, w1, [x2] +; GISEL-LABEL: test_store_release_i8: +; GISEL-NOT: uxtb +; GISEL-NOT: and +; GISEL: stlxrb w0, w1, [x2] %extval = zext i8 %val to i64 %res = call i32 @llvm.aarch64.stlxr.p0i8(i64 %extval, i8* %addr) ret i32 %res } +; FALLBACK-NOT: remark:{{.*}}test_store_release_i16 define i32 @test_store_release_i16(i32, i16 %val, i16* %addr) { ; CHECK-LABEL: test_store_release_i16: ; CHECK-NOT: uxth ; CHECK-NOT: and ; CHECK: stlxrh w0, w1, [x2] +; GISEL-LABEL: test_store_release_i16: +; GISEL-NOT: uxth +; GISEL-NOT: and +; GISEL: stlxrh w0, w1, [x2] %extval = zext i16 %val to i64 %res = call i32 @llvm.aarch64.stlxr.p0i16(i64 %extval, i16* %addr) ret i32 %res