This is an archive of the discontinued LLVM Phabricator instance.

Differential D65422

[GlobalISel][AArch64] Add naive selection for @llvm.aarch64.ldaxr
AbandonedPublic

Authored by paquette on Jul 29 2019, 3:38 PM.

Download Raw Diff

Details

Reviewers

aemerson

Summary

This adds naive instruction selection for the @llvm.aarch64.ldaxr intrinsic.

This will select the right instruction based off the number of bytes being loaded, but it doesn't try to do any folding etc. This isn't great for code size, but at least prevents us from falling back on this intrinsic.

(Since the instructions we're not folding have already been selected by the time we run into the intrinsic, it's difficult to do this the "right" way during selection.)

Add a test for the intrinsic (select-ldaxr-intrin.mir) and update arm64-ldxr-stxr.ll as well.

Diff Detail

Event Timeline

paquette created this revision.Jul 29 2019, 3:38 PM

Herald added a project: Restricted Project. · View Herald TranscriptJul 29 2019, 3:38 PM

Herald added subscribers: Petar.Avramovic, hiraditya, kristof.beyls and 2 others. · View Herald Transcript

I'd prefer to do this by extending the emitter. After D65756 lands, we should be able to get the 64-bit load intrinsics this way.

However, we won't be able to get the smaller ones, since their patterns use SUBREG_TO_REG. The emitter doesn't know how to support that yet.

Abandoning this because D66897 does it in a much better way. Once we import SUBREG_TO_REG (D66769), we can just import all of these.

Revision Contents

Path

Size

llvm/

lib/

Target/

AArch64/

AArch64InstructionSelector.cpp

53 lines

test/

CodeGen/

AArch64/

GlobalISel/

select-ldaxr-intrin.mir

107 lines

arm64-ldxr-stxr.ll

48 lines

Diff 212241

llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

Show First 20 Lines • Show All 3,919 Lines • ▼ Show 20 Lines	bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
case Intrinsic::trap:		case Intrinsic::trap:
MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);		MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
break;		break;
case Intrinsic::debugtrap:		case Intrinsic::debugtrap:
if (!STI.isTargetWindows())		if (!STI.isTargetWindows())
return false;		return false;
MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);		MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
break;		break;
		case Intrinsic::aarch64_ldaxr: {
		Register DstReg = I.getOperand(0).getReg();
		Register SrcReg = I.getOperand(2).getReg();
		MachineMemOperand MemOp = I.memoperands_begin();
		unsigned NumBytesToLoad = MemOp->getSize();

		// Don't handle loads bigger than 64 bits.
		if (NumBytesToLoad > 8)
		return false;

		// We can only do this for GPRs.
		const RegisterBank &RB = *RBI.getRegBank(DstReg, MRI, TRI);
		if (RB.getID() == AArch64::FPRRegBankID)
		return false;

		// This intrinsic always has an s64 as its destination, even when we aren't
		// loading 64 bits. If we aren't loading 64 bits, then we should do the
		// load on a smaller register.
		unsigned NumBitsToLoad = NumBytesToLoad * 8;
		if (NumBitsToLoad < 64) {
		// We're loading something smaller. Use a 32-bit register.
		DstReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
		RBI.constrainGenericRegister(DstReg, AArch64::GPR32RegClass, MRI);
		}

		// Emit the load instruction.
		static const unsigned OpcTable[4] = {AArch64::LDAXRB, AArch64::LDAXRH,
		AArch64::LDAXRW, AArch64::LDAXRX};
		unsigned Opc = OpcTable[Log2_32(NumBytesToLoad)];
		auto &LoadMI =
		MIRBuilder.buildInstr(Opc, {DstReg}, {SrcReg}).addMemOperand(MemOp);

		// Check if we're loading into a 64-bit register.
		if (NumBitsToLoad < 64) {
		// Not loading into a 64-bit register. Use SUBREG_TO_REG to get back into
		// 64-bits.
		// FIXME: At this point, we may have already selected some redundant
		// instructions following the load. We should either recognize this
		// intrinsic earlier, or we should match those instructions if possible.
		Register OrigDst = I.getOperand(0).getReg();
		auto SubRegToReg =
		MIRBuilder.buildInstr(AArch64::SUBREG_TO_REG, {OrigDst}, {})
		.addImm(0)
		.addUse(DstReg)
		.addImm(AArch64::sub_32);
		// Make sure the original destination is properly constrained.
		RBI.constrainGenericRegister(OrigDst,
		AArch64::GPR64RegClass, MRI);
		constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI);
		}
		constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
		break;
		}
case Intrinsic::aarch64_stlxr:		case Intrinsic::aarch64_stlxr:
Register StatReg = I.getOperand(0).getReg();		Register StatReg = I.getOperand(0).getReg();
assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&		assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
"Status register must be 32 bits!");		"Status register must be 32 bits!");
Register SrcReg = I.getOperand(2).getReg();		Register SrcReg = I.getOperand(2).getReg();

if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {		if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");		LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
▲ Show 20 Lines • Show All 478 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/GlobalISel/select-ldaxr-intrin.mir

This file was added.

				# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
				# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - \| FileCheck %s

				--- \|
				define void @test_load_acquire_i8(i8* %addr) { ret void }
				define void @test_load_acquire_i16(i16* %addr) { ret void }
				define void @test_load_acquire_i32(i32* %addr) { ret void }
				define void @test_load_acquire_i64(i64* %addr) { ret void }

				...
				---
				name: test_load_acquire_i8
				alignment: 2
				legalized: true
				regBankSelected: true
				tracksRegLiveness: true
				body: \|
				bb.0:
				liveins: $x0

				; CHECK-LABEL: name: test_load_acquire_i8
				; CHECK: liveins: $x0
				; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
				; CHECK: [[LDAXRB:%[0-9]+]]:gpr32 = LDAXRB [[COPY]] :: (volatile load 1 from %ir.addr)
				; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64common = SUBREG_TO_REG 0, [[LDAXRB]], %subreg.sub_32
				; CHECK: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[SUBREG_TO_REG]].sub_32
				; CHECK: $w0 = COPY [[COPY1]]
				; CHECK: RET_ReallyLR implicit $w0
				%0:gpr(p0) = COPY $x0
				%1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 1 from %ir.addr)
				%3:gpr(s32) = G_TRUNC %1(s64)
				$w0 = COPY %3(s32)
				RET_ReallyLR implicit $w0

				...
				---
				name: test_load_acquire_i16
				alignment: 2
				legalized: true
				regBankSelected: true
				tracksRegLiveness: true
				body: \|
				bb.0:
				liveins: $x0

				; CHECK-LABEL: name: test_load_acquire_i16
				; CHECK: liveins: $x0
				; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
				; CHECK: [[LDAXRH:%[0-9]+]]:gpr32 = LDAXRH [[COPY]] :: (volatile load 2 from %ir.addr)
				; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64common = SUBREG_TO_REG 0, [[LDAXRH]], %subreg.sub_32
				; CHECK: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[SUBREG_TO_REG]].sub_32
				; CHECK: $w0 = COPY [[COPY1]]
				; CHECK: RET_ReallyLR implicit $w0
				%0:gpr(p0) = COPY $x0
				%1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 2 from %ir.addr)
				%3:gpr(s32) = G_TRUNC %1(s64)
				$w0 = COPY %3(s32)
				RET_ReallyLR implicit $w0

				...
				---
				name: test_load_acquire_i32
				alignment: 2
				legalized: true
				regBankSelected: true
				tracksRegLiveness: true
				body: \|
				bb.0:
				liveins: $x0

				; CHECK-LABEL: name: test_load_acquire_i32
				; CHECK: liveins: $x0
				; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
				; CHECK: [[LDAXRW:%[0-9]+]]:gpr32 = LDAXRW [[COPY]] :: (volatile load 4 from %ir.addr)
				; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64common = SUBREG_TO_REG 0, [[LDAXRW]], %subreg.sub_32
				; CHECK: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[SUBREG_TO_REG]].sub_32
				; CHECK: $w0 = COPY [[COPY1]]
				; CHECK: RET_ReallyLR implicit $w0
				%0:gpr(p0) = COPY $x0
				%1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 4 from %ir.addr)
				%2:gpr(s32) = G_TRUNC %1(s64)
				$w0 = COPY %2(s32)
				RET_ReallyLR implicit $w0

				...
				---
				name: test_load_acquire_i64
				alignment: 2
				legalized: true
				regBankSelected: true
				tracksRegLiveness: true
				body: \|
				bb.0:
				liveins: $x0

				; CHECK-LABEL: name: test_load_acquire_i64
				; CHECK: liveins: $x0
				; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
				; CHECK: [[LDAXRX:%[0-9]+]]:gpr64 = LDAXRX [[COPY]] :: (volatile load 8 from %ir.addr)
				; CHECK: $x0 = COPY [[LDAXRX]]
				; CHECK: RET_ReallyLR implicit $x0
				%0:gpr(p0) = COPY $x0
				%1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 8 from %ir.addr)
				$x0 = COPY %1(s64)
				RET_ReallyLR implicit $x0

				...

llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll

Show First 20 Lines • Show All 164 Lines • ▼ Show 20 Lines	entry:
%tmp7 = trunc i128 %tmp6 to i64		%tmp7 = trunc i128 %tmp6 to i64
%strexd = tail call i32 @llvm.aarch64.stlxp(i64 %tmp4, i64 %tmp7, i8* %ptr)		%strexd = tail call i32 @llvm.aarch64.stlxp(i64 %tmp4, i64 %tmp7, i8* %ptr)
ret i32 %strexd		ret i32 %strexd
}		}

declare %0 @llvm.aarch64.ldaxp(i8*) nounwind		declare %0 @llvm.aarch64.ldaxp(i8*) nounwind
declare i32 @llvm.aarch64.stlxp(i64, i64, i8*) nounwind		declare i32 @llvm.aarch64.stlxp(i64, i64, i8*) nounwind

		; FALLBACK-NOT: remark:{{.*}}test_load_acquire_i8
define void @test_load_acquire_i8(i8* %addr) {		define void @test_load_acquire_i8(i8* %addr) {
; CHECK-LABEL: test_load_acquire_i8:		; CHECK-LABEL: test_load_acquire_i8:
; CHECK: ldaxrb w[[LOADVAL:[0-9]+]], [x0]		; CHECK: ldaxrb w[[LOADVAL:[0-9]+]], [x0]
; CHECK-NOT: uxtb		; CHECK-NOT: uxtb
; CHECK-NOT: and		; CHECK-NOT: and
; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]		; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]

		; FIXME: Global ISel has to emit a bunch of pointless instructions here right
		; now. We should teach it to recognize that the result is going to be on a
		; 32-bit register.
		; FIXME: Global ISel doesn't know how to handle the store addressing mode here,
		; so it will emit a normal store.

		; GISEL-LABEL: test_load_acquire_i8:
		; GISEL: ldaxrb w[[#LOADVAL:]], [x0]
		; GISEL-NEXT: adrp x[[#ADDRESS:]], var
		; GISEL-NEXT: mov x[[#MASK:]], #255
		; GISEL-NEXT: add x[[#ADDRESS]], x[[#ADDRESS]], :lo12:var
		; GISEL-NEXT: and x[[#LOADVAL]], x[[#LOADVAL]], x[[#MASK]]
		; GISEL-NEXT: str x[[#LOADVAL]], [x[[#ADDRESS]]]
%val = call i64 @llvm.aarch64.ldaxr.p0i8(i8* %addr)		%val = call i64 @llvm.aarch64.ldaxr.p0i8(i8* %addr)
%shortval = trunc i64 %val to i8		%shortval = trunc i64 %val to i8
%extval = zext i8 %shortval to i64		%extval = zext i8 %shortval to i64
store i64 %extval, i64* @var, align 8		store i64 %extval, i64* @var, align 8
ret void		ret void
}		}

		; FALLBACK-NOT: remark:{{.*}}test_load_acquire_i16
define void @test_load_acquire_i16(i16* %addr) {		define void @test_load_acquire_i16(i16* %addr) {
; CHECK-LABEL: test_load_acquire_i16:		; CHECK-LABEL: test_load_acquire_i16:
; CHECK: ldaxrh w[[LOADVAL:[0-9]+]], [x0]		; CHECK: ldaxrh w[[LOADVAL:[0-9]+]], [x0]
; CHECK-NOT: uxth		; CHECK-NOT: uxth
; CHECK-NOT: and		; CHECK-NOT: and
; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]		; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]

		; FIXME: Global ISel has to emit a bunch of pointless instructions here right
		; now. We should teach it to recognize that the result is going to be on a
		; 32-bit register.
		; FIXME: Global ISel doesn't know how to handle the store addressing mode here,
		; so it will emit a normal store.

		; GISEL-LABEL: test_load_acquire_i16:
		; GISEL: ldaxrh w[[#LOADVAL:]], [x0]
		; GISEL-NEXT: adrp x[[#ADDRESS:]], var
		; GISEL-NEXT: mov x[[#MASK:]], #65535
		; GISEL-NEXT: add x[[#ADDRESS]], x[[#ADDRESS]], :lo12:var
		; GISEL-NEXT: and x[[#LOADVAL]], x[[#LOADVAL]], x[[#MASK]]
		; GISEL-NEXT: str x[[#LOADVAL]], [x[[#ADDRESS]]]
%val = call i64 @llvm.aarch64.ldaxr.p0i16(i16* %addr)		%val = call i64 @llvm.aarch64.ldaxr.p0i16(i16* %addr)
%shortval = trunc i64 %val to i16		%shortval = trunc i64 %val to i16
%extval = zext i16 %shortval to i64		%extval = zext i16 %shortval to i64
store i64 %extval, i64* @var, align 8		store i64 %extval, i64* @var, align 8
ret void		ret void
}		}

		; FALLBACK-NOT: remark:{{.*}}test_load_acquire_i32
define void @test_load_acquire_i32(i32* %addr) {		define void @test_load_acquire_i32(i32* %addr) {
; CHECK-LABEL: test_load_acquire_i32:		; CHECK-LABEL: test_load_acquire_i32:
; CHECK: ldaxr w[[LOADVAL:[0-9]+]], [x0]		; CHECK: ldaxr w[[LOADVAL:[0-9]+]], [x0]
; CHECK-NOT: uxtw		; CHECK-NOT: uxtw
; CHECK-NOT: and		; CHECK-NOT: and
; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]		; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]

		; FIXME: Global ISel has to emit a bunch of pointless instructions here right
		; now. We should teach it to recognize that the result is going to be on a
		; 32-bit register.
		; FIXME: Global ISel doesn't know how to handle the store addressing mode here,
		; so it will emit a normal store.

		; GISEL-LABEL: test_load_acquire_i32:
		; GISEL: ldaxr w[[#LOADVAL:]], [x0]
		; GISEL-NEXT: adrp x[[#ADDRESS:]], var
		; GISEL-NEXT: mov x[[#MASK:]], #4294967295
		; GISEL-NEXT: add x[[#ADDRESS]], x[[#ADDRESS]], :lo12:var
		; GISEL-NEXT: and x[[#LOADVAL]], x[[#LOADVAL]], x[[#MASK]]
		; GISEL-NEXT: str x[[#LOADVAL]], [x[[#ADDRESS]]]
%val = call i64 @llvm.aarch64.ldaxr.p0i32(i32* %addr)		%val = call i64 @llvm.aarch64.ldaxr.p0i32(i32* %addr)
%shortval = trunc i64 %val to i32		%shortval = trunc i64 %val to i32
%extval = zext i32 %shortval to i64		%extval = zext i32 %shortval to i64
store i64 %extval, i64* @var, align 8		store i64 %extval, i64* @var, align 8
ret void		ret void
}		}

		; FALLBACK-NOT: remark:{{.*}}test_load_acquire_i64
define void @test_load_acquire_i64(i64* %addr) {		define void @test_load_acquire_i64(i64* %addr) {
; CHECK-LABEL: test_load_acquire_i64:		; CHECK-LABEL: test_load_acquire_i64:
; CHECK: ldaxr x[[LOADVAL:[0-9]+]], [x0]		; CHECK: ldaxr x[[LOADVAL:[0-9]+]], [x0]
; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]		; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]

		; GISEL-LABEL: test_load_acquire_i64:
		; GISEL: ldaxr x[[#LOADVAL:]], [x0]
		; GISEL-NEXT: adrp x[[#ADDRESS:]], var
		; GISEL-NEXT: add x[[#ADDRESS]], x[[#ADDRESS]], :lo12:var
		; GISEL-NEXT: str x[[#LOADVAL]], [x[[#ADDRESS]]]
%val = call i64 @llvm.aarch64.ldaxr.p0i64(i64* %addr)		%val = call i64 @llvm.aarch64.ldaxr.p0i64(i64* %addr)
store i64 %val, i64* @var, align 8		store i64 %val, i64* @var, align 8
ret void		ret void
}		}


declare i64 @llvm.aarch64.ldaxr.p0i8(i8*) nounwind		declare i64 @llvm.aarch64.ldaxr.p0i8(i8*) nounwind
declare i64 @llvm.aarch64.ldaxr.p0i16(i16*) nounwind		declare i64 @llvm.aarch64.ldaxr.p0i16(i16*) nounwind
▲ Show 20 Lines • Show All 52 Lines • Show Last 20 Lines