Index: llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td +++ llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td @@ -237,19 +237,27 @@ def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }]; +} def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }]; +} def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }]; +} def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }]; +} def : Pat<(ldaxr_1 GPR64sp:$addr), (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>; Index: llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -242,6 +242,9 @@ MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const; + /// Return true if \p MI is a load or store of \p NumBytes bytes. + bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; + const AArch64TargetMachine &TM; const AArch64Subtarget &STI; const AArch64InstrInfo &TII; @@ -4552,6 +4555,15 @@ MIB.addImm(Enc); } +bool AArch64InstructionSelector::isLoadStoreOfNumBytes( + const MachineInstr &MI, unsigned NumBytes) const { + if (!MI.mayLoadOrStore()) + return false; + assert(MI.hasOneMemOperand() && + "Expected load/store to have only one mem op!"); + return (*MI.memoperands_begin())->getSize() == NumBytes; +} + namespace llvm { InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &TM, Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ldaxr-intrin.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ldaxr-intrin.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ldaxr-intrin.mir @@ -0,0 +1,94 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- | + define void @test_load_acquire_i8(i8* %addr) { ret void } + define void @test_load_acquire_i16(i16* %addr) { ret void } + define void @test_load_acquire_i32(i32* %addr) { ret void } + define void @test_load_acquire_i64(i64* %addr) { ret void } +... +--- + +name: test_load_acquire_i8 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: test_load_acquire_i8 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDAXRB:%[0-9]+]]:gpr32 = LDAXRB [[COPY]] :: (volatile load 1 from %ir.addr) + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[LDAXRB]], %subreg.sub_32 + ; CHECK: $x1 = COPY [[SUBREG_TO_REG]] + ; CHECK: RET_ReallyLR implicit $x1 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 1 from %ir.addr) + $x1 = COPY %1 + RET_ReallyLR implicit $x1 + +... +--- +name: test_load_acquire_i16 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: test_load_acquire_i16 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDAXRH:%[0-9]+]]:gpr32 = LDAXRH [[COPY]] :: (volatile load 2 from %ir.addr) + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[LDAXRH]], %subreg.sub_32 + ; CHECK: $x1 = COPY [[SUBREG_TO_REG]] + ; CHECK: RET_ReallyLR implicit $x1 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 2 from %ir.addr) + $x1 = COPY %1 + RET_ReallyLR implicit $x1 + +... +--- +name: test_load_acquire_i32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: test_load_acquire_i32 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDAXRW:%[0-9]+]]:gpr32 = LDAXRW [[COPY]] :: (volatile load 4 from %ir.addr) + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[LDAXRW]], %subreg.sub_32 + ; CHECK: $x1 = COPY [[SUBREG_TO_REG]] + ; CHECK: RET_ReallyLR implicit $x1 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 4 from %ir.addr) + $x1 = COPY %1 + RET_ReallyLR implicit $x1 + +... +--- +name: test_load_acquire_i64 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: test_load_acquire_i64 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDAXRX:%[0-9]+]]:gpr64 = LDAXRX [[COPY]] :: (volatile load 8 from %ir.addr) + ; CHECK: $x1 = COPY [[LDAXRX]] + ; CHECK: RET_ReallyLR implicit $x1 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 8 from %ir.addr) + $x1 = COPY %1 + RET_ReallyLR implicit $x1 Index: llvm/trunk/test/CodeGen/AArch64/arm64-ldxr-stxr.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-ldxr-stxr.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-ldxr-stxr.ll @@ -170,6 +170,7 @@ declare %0 @llvm.aarch64.ldaxp(i8*) nounwind declare i32 @llvm.aarch64.stlxp(i64, i64, i8*) nounwind +; FALLBACK-NOT: remark:{{.*}}test_load_acquire_i8 define void @test_load_acquire_i8(i8* %addr) { ; CHECK-LABEL: test_load_acquire_i8: ; CHECK: ldaxrb w[[LOADVAL:[0-9]+]], [x0] @@ -177,6 +178,11 @@ ; CHECK-NOT: and ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] +; FIXME: GlobalISel doesn't fold ands/adds into load/store addressing modes +; right now/ So, we won't get the :lo12:var. +; GISEL-LABEL: test_load_acquire_i8: +; GISEL: ldaxrb w[[LOADVAL:[0-9]+]], [x0] +; GISEL-DAG: str x[[LOADVAL]], [{{x[0-9]+}}] %val = call i64 @llvm.aarch64.ldaxr.p0i8(i8* %addr) %shortval = trunc i64 %val to i8 %extval = zext i8 %shortval to i64 @@ -184,6 +190,7 @@ ret void } +; FALLBACK-NOT: remark:{{.*}}test_load_acquire_i16 define void @test_load_acquire_i16(i16* %addr) { ; CHECK-LABEL: test_load_acquire_i16: ; CHECK: ldaxrh w[[LOADVAL:[0-9]+]], [x0] @@ -191,6 +198,9 @@ ; CHECK-NOT: and ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] +; GISEL-LABEL: test_load_acquire_i16: +; GISEL: ldaxrh w[[LOADVAL:[0-9]+]], [x0] +; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}] %val = call i64 @llvm.aarch64.ldaxr.p0i16(i16* %addr) %shortval = trunc i64 %val to i16 %extval = zext i16 %shortval to i64 @@ -198,6 +208,7 @@ ret void } +; FALLBACK-NOT: remark:{{.*}}test_load_acquire_i32 define void @test_load_acquire_i32(i32* %addr) { ; CHECK-LABEL: test_load_acquire_i32: ; CHECK: ldaxr w[[LOADVAL:[0-9]+]], [x0] @@ -205,6 +216,9 @@ ; CHECK-NOT: and ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] +; GISEL-LABEL: test_load_acquire_i32: +; GISEL: ldaxr w[[LOADVAL:[0-9]+]], [x0] +; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}] %val = call i64 @llvm.aarch64.ldaxr.p0i32(i32* %addr) %shortval = trunc i64 %val to i32 %extval = zext i32 %shortval to i64 @@ -212,11 +226,15 @@ ret void } +; FALLBACK-NOT: remark:{{.*}}test_load_acquire_i64 define void @test_load_acquire_i64(i64* %addr) { ; CHECK-LABEL: test_load_acquire_i64: ; CHECK: ldaxr x[[LOADVAL:[0-9]+]], [x0] ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] +; GISEL-LABEL: test_load_acquire_i64: +; GISEL: ldaxr x[[LOADVAL:[0-9]+]], [x0] +; GISEL: str x[[LOADVAL]], [{{x[0-9]+}}] %val = call i64 @llvm.aarch64.ldaxr.p0i64(i64* %addr) store i64 %val, i64* @var, align 8 ret void