Index: llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -3925,6 +3925,59 @@ return false; MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); break; + case Intrinsic::aarch64_ldaxr: { + Register DstReg = I.getOperand(0).getReg(); + Register SrcReg = I.getOperand(2).getReg(); + MachineMemOperand *MemOp = *I.memoperands_begin(); + unsigned NumBytesToLoad = MemOp->getSize(); + + // Don't handle loads bigger than 64 bits. + if (NumBytesToLoad > 8) + return false; + + // We can only do this for GPRs. + const RegisterBank &RB = *RBI.getRegBank(DstReg, MRI, TRI); + if (RB.getID() == AArch64::FPRRegBankID) + return false; + + // This intrinsic always has an s64 as its destination, even when we aren't + // loading 64 bits. If we aren't loading 64 bits, then we should do the + // load on a smaller register. + unsigned NumBitsToLoad = NumBytesToLoad * 8; + if (NumBitsToLoad < 64) { + // We're loading something smaller. Use a 32-bit register. + DstReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + RBI.constrainGenericRegister(DstReg, AArch64::GPR32RegClass, MRI); + } + + // Emit the load instruction. + static const unsigned OpcTable[4] = {AArch64::LDAXRB, AArch64::LDAXRH, + AArch64::LDAXRW, AArch64::LDAXRX}; + unsigned Opc = OpcTable[Log2_32(NumBytesToLoad)]; + auto &LoadMI = + MIRBuilder.buildInstr(Opc, {DstReg}, {SrcReg}).addMemOperand(MemOp); + + // Check if we're loading into a 64-bit register. + if (NumBitsToLoad < 64) { + // Not loading into a 64-bit register. Use SUBREG_TO_REG to get back into + // 64-bits. + // FIXME: At this point, we may have already selected some redundant + // instructions following the load. We should either recognize this + // intrinsic earlier, or we should match those instructions if possible. + Register OrigDst = I.getOperand(0).getReg(); + auto SubRegToReg = + MIRBuilder.buildInstr(AArch64::SUBREG_TO_REG, {OrigDst}, {}) + .addImm(0) + .addUse(DstReg) + .addImm(AArch64::sub_32); + // Make sure the original destination is properly constrained. + RBI.constrainGenericRegister(OrigDst, + AArch64::GPR64RegClass, MRI); + constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI); + } + constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI); + break; + } case Intrinsic::aarch64_stlxr: Register StatReg = I.getOperand(0).getReg(); assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 && Index: llvm/test/CodeGen/AArch64/GlobalISel/select-ldaxr-intrin.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/select-ldaxr-intrin.mir @@ -0,0 +1,107 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- | + define void @test_load_acquire_i8(i8* %addr) { ret void } + define void @test_load_acquire_i16(i16* %addr) { ret void } + define void @test_load_acquire_i32(i32* %addr) { ret void } + define void @test_load_acquire_i64(i64* %addr) { ret void } + +... +--- +name: test_load_acquire_i8 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: test_load_acquire_i8 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDAXRB:%[0-9]+]]:gpr32 = LDAXRB [[COPY]] :: (volatile load 1 from %ir.addr) + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64common = SUBREG_TO_REG 0, [[LDAXRB]], %subreg.sub_32 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[SUBREG_TO_REG]].sub_32 + ; CHECK: $w0 = COPY [[COPY1]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 1 from %ir.addr) + %3:gpr(s32) = G_TRUNC %1(s64) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_load_acquire_i16 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: test_load_acquire_i16 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDAXRH:%[0-9]+]]:gpr32 = LDAXRH [[COPY]] :: (volatile load 2 from %ir.addr) + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64common = SUBREG_TO_REG 0, [[LDAXRH]], %subreg.sub_32 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[SUBREG_TO_REG]].sub_32 + ; CHECK: $w0 = COPY [[COPY1]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 2 from %ir.addr) + %3:gpr(s32) = G_TRUNC %1(s64) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_load_acquire_i32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: test_load_acquire_i32 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDAXRW:%[0-9]+]]:gpr32 = LDAXRW [[COPY]] :: (volatile load 4 from %ir.addr) + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64common = SUBREG_TO_REG 0, [[LDAXRW]], %subreg.sub_32 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[SUBREG_TO_REG]].sub_32 + ; CHECK: $w0 = COPY [[COPY1]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 4 from %ir.addr) + %2:gpr(s32) = G_TRUNC %1(s64) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_load_acquire_i64 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: test_load_acquire_i64 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDAXRX:%[0-9]+]]:gpr64 = LDAXRX [[COPY]] :: (volatile load 8 from %ir.addr) + ; CHECK: $x0 = COPY [[LDAXRX]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 8 from %ir.addr) + $x0 = COPY %1(s64) + RET_ReallyLR implicit $x0 + +... Index: llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll +++ llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll @@ -170,6 +170,7 @@ declare %0 @llvm.aarch64.ldaxp(i8*) nounwind declare i32 @llvm.aarch64.stlxp(i64, i64, i8*) nounwind +; FALLBACK-NOT: remark:{{.*}}test_load_acquire_i8 define void @test_load_acquire_i8(i8* %addr) { ; CHECK-LABEL: test_load_acquire_i8: ; CHECK: ldaxrb w[[LOADVAL:[0-9]+]], [x0] @@ -177,6 +178,19 @@ ; CHECK-NOT: and ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] +; FIXME: Global ISel has to emit a bunch of pointless instructions here right +; now. We should teach it to recognize that the result is going to be on a +; 32-bit register. +; FIXME: Global ISel doesn't know how to handle the store addressing mode here, +; so it will emit a normal store. + +; GISEL-LABEL: test_load_acquire_i8: +; GISEL: ldaxrb w[[#LOADVAL:]], [x0] +; GISEL-NEXT: adrp x[[#ADDRESS:]], var +; GISEL-NEXT: mov x[[#MASK:]], #255 +; GISEL-NEXT: add x[[#ADDRESS]], x[[#ADDRESS]], :lo12:var +; GISEL-NEXT: and x[[#LOADVAL]], x[[#LOADVAL]], x[[#MASK]] +; GISEL-NEXT: str x[[#LOADVAL]], [x[[#ADDRESS]]] %val = call i64 @llvm.aarch64.ldaxr.p0i8(i8* %addr) %shortval = trunc i64 %val to i8 %extval = zext i8 %shortval to i64 @@ -184,6 +198,7 @@ ret void } +; FALLBACK-NOT: remark:{{.*}}test_load_acquire_i16 define void @test_load_acquire_i16(i16* %addr) { ; CHECK-LABEL: test_load_acquire_i16: ; CHECK: ldaxrh w[[LOADVAL:[0-9]+]], [x0] @@ -191,6 +206,19 @@ ; CHECK-NOT: and ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] +; FIXME: Global ISel has to emit a bunch of pointless instructions here right +; now. We should teach it to recognize that the result is going to be on a +; 32-bit register. +; FIXME: Global ISel doesn't know how to handle the store addressing mode here, +; so it will emit a normal store. + +; GISEL-LABEL: test_load_acquire_i16: +; GISEL: ldaxrh w[[#LOADVAL:]], [x0] +; GISEL-NEXT: adrp x[[#ADDRESS:]], var +; GISEL-NEXT: mov x[[#MASK:]], #65535 +; GISEL-NEXT: add x[[#ADDRESS]], x[[#ADDRESS]], :lo12:var +; GISEL-NEXT: and x[[#LOADVAL]], x[[#LOADVAL]], x[[#MASK]] +; GISEL-NEXT: str x[[#LOADVAL]], [x[[#ADDRESS]]] %val = call i64 @llvm.aarch64.ldaxr.p0i16(i16* %addr) %shortval = trunc i64 %val to i16 %extval = zext i16 %shortval to i64 @@ -198,6 +226,7 @@ ret void } +; FALLBACK-NOT: remark:{{.*}}test_load_acquire_i32 define void @test_load_acquire_i32(i32* %addr) { ; CHECK-LABEL: test_load_acquire_i32: ; CHECK: ldaxr w[[LOADVAL:[0-9]+]], [x0] @@ -205,6 +234,19 @@ ; CHECK-NOT: and ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] +; FIXME: Global ISel has to emit a bunch of pointless instructions here right +; now. We should teach it to recognize that the result is going to be on a +; 32-bit register. +; FIXME: Global ISel doesn't know how to handle the store addressing mode here, +; so it will emit a normal store. + +; GISEL-LABEL: test_load_acquire_i32: +; GISEL: ldaxr w[[#LOADVAL:]], [x0] +; GISEL-NEXT: adrp x[[#ADDRESS:]], var +; GISEL-NEXT: mov x[[#MASK:]], #4294967295 +; GISEL-NEXT: add x[[#ADDRESS]], x[[#ADDRESS]], :lo12:var +; GISEL-NEXT: and x[[#LOADVAL]], x[[#LOADVAL]], x[[#MASK]] +; GISEL-NEXT: str x[[#LOADVAL]], [x[[#ADDRESS]]] %val = call i64 @llvm.aarch64.ldaxr.p0i32(i32* %addr) %shortval = trunc i64 %val to i32 %extval = zext i32 %shortval to i64 @@ -212,11 +254,17 @@ ret void } +; FALLBACK-NOT: remark:{{.*}}test_load_acquire_i64 define void @test_load_acquire_i64(i64* %addr) { ; CHECK-LABEL: test_load_acquire_i64: ; CHECK: ldaxr x[[LOADVAL:[0-9]+]], [x0] ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] +; GISEL-LABEL: test_load_acquire_i64: +; GISEL: ldaxr x[[#LOADVAL:]], [x0] +; GISEL-NEXT: adrp x[[#ADDRESS:]], var +; GISEL-NEXT: add x[[#ADDRESS]], x[[#ADDRESS]], :lo12:var +; GISEL-NEXT: str x[[#LOADVAL]], [x[[#ADDRESS]]] %val = call i64 @llvm.aarch64.ldaxr.p0i64(i64* %addr) store i64 %val, i64* @var, align 8 ret void