Index: llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -3892,7 +3892,9 @@ /// intrinsic. static unsigned getStlxrOpcode(unsigned NumBytesToStore) { switch (NumBytesToStore) { - // TODO: 1, 2, and 4 byte stores. + // TODO: 1 and 2 byte stores + case 4: + return AArch64::STLXRW; case 8: return AArch64::STLXRX; default: @@ -3946,8 +3948,24 @@ unsigned Opc = getStlxrOpcode(NumBytesToStore); if (!Opc) return false; - - auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg}); + unsigned NumBitsToStore = NumBytesToStore * 8; + if (NumBitsToStore != 64) { + // The intrinsic always has a 64-bit source, but we might actually want + // a differently-sized source for the instruction. Try to get it. + // TODO: For 1 and 2-byte stores, this will have a G_AND. For now, let's + // just handle 4-byte stores. + // TODO: If we don't find a G_ZEXT, we'll have to truncate the value down + // to the right size for the STLXR. + MachineInstr *Zext = getOpcodeDef(TargetOpcode::G_ZEXT, SrcReg, MRI); + if (!Zext) + return false; + SrcReg = Zext->getOperand(1).getReg(); + // We should get an appropriately-sized register here. + if (RBI.getSizeInBits(SrcReg, MRI, TRI) != NumBitsToStore) + return false; + } + auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg}) + .addMemOperand(*I.memoperands_begin()); constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI); } Index: llvm/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir @@ -5,6 +5,10 @@ define i32 @test_store_release_i64(i32 %a, i64* %addr) { ret i32 %a } + + define i32 @test_store_release_i32(i32 %a, i64* %addr) { + ret i32 %a + } ... --- name: test_store_release_i64 @@ -20,7 +24,7 @@ ; CHECK: liveins: $w0, $x1, $x2 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x2 - ; CHECK: early-clobber %2:gpr32 = STLXRX [[COPY]], [[COPY1]] + ; CHECK: early-clobber %2:gpr32 = STLXRX [[COPY]], [[COPY1]] :: (volatile store 8 into %ir.addr) ; CHECK: $w0 = COPY %2 ; CHECK: RET_ReallyLR implicit $w0 %1:gpr(s64) = COPY $x1 @@ -30,3 +34,27 @@ RET_ReallyLR implicit $w0 ... +--- +name: test_store_release_i32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $x2 + ; CHECK-LABEL: name: test_store_release_i32 + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x2 + ; CHECK: early-clobber %3:gpr32 = STLXRW [[COPY]], [[COPY1]] :: (volatile store 4 into %ir.addr) + ; CHECK: $w0 = COPY %3 + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(p0) = COPY $x2 + %3:gpr(s64) = G_ZEXT %1(s32) + %4:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stlxr), %3(s64), %2(p0) :: (volatile store 4 into %ir.addr) + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +... Index: llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll +++ llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll @@ -248,11 +248,16 @@ ret i32 %res } +; FALLBACK-NOT: remark:{{.*}}test_store_release_i32 define i32 @test_store_release_i32(i32, i32 %val, i32* %addr) { ; CHECK-LABEL: test_store_release_i32: ; CHECK-NOT: uxtw ; CHECK-NOT: and ; CHECK: stlxr w0, w1, [x2] +; GISEL-LABEL: test_store_release_i32: +; GISEL-NOT: uxtw +; GISEL-NOT: and +; GISEL: stlxr w0, w1, [x2] %extval = zext i32 %val to i64 %res = call i32 @llvm.aarch64.stlxr.p0i32(i64 %extval, i32* %addr) ret i32 %res