Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2614,8 +2614,23 @@ // Need special instructions for atomics that affect ordering. if (Order != AtomicOrdering::NotAtomic && Order != AtomicOrdering::Unordered && - Order != AtomicOrdering::Monotonic) - return false; + Order != AtomicOrdering::Monotonic) { + assert(I.getOpcode() != TargetOpcode::G_ZEXTLOAD); + if (MemSizeInBytes > 64) + return false; + + if (I.getOpcode() == TargetOpcode::G_LOAD) { + static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH, + AArch64::LDARW, AArch64::LDARX}; + I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); + } else { + static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH, + AArch64::STLRW, AArch64::STLRX}; + I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); + } + constrainSelectedInstRegOperands(I, TII, TRI, RBI); + return true; + } #ifndef NDEBUG const Register PtrReg = I.getOperand(1).getReg(); Index: llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -251,6 +251,7 @@ .widenScalarToNextPow2(0); getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) + .lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered)) .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}, {s32, p0, 32, 8}, Index: llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -1089,4 +1089,139 @@ ret void } +define i32 @load_zext(i8* %p8, i16* %p16) { +; CHECK-NOLSE-O1-LABEL: load_zext: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: ldarb w8, [x0] +; CHECK-NOLSE-O1-NEXT: ldrh w9, [x1] +; CHECK-NOLSE-O1-NEXT: add w0, w9, w8, uxtb +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: load_zext: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: ldarb w9, [x0] +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x1] +; CHECK-NOLSE-O0-NEXT: add w0, w8, w9, uxtb +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: load_zext: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: ldarb w8, [x0] +; CHECK-LSE-O1-NEXT: ldrh w9, [x1] +; CHECK-LSE-O1-NEXT: add w0, w9, w8, uxtb +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: load_zext: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: ldarb w9, [x0] +; CHECK-LSE-O0-NEXT: ldrh w8, [x1] +; CHECK-LSE-O0-NEXT: add w0, w8, w9, uxtb +; CHECK-LSE-O0-NEXT: ret + %val1.8 = load atomic i8, i8* %p8 acquire, align 1 + %val1 = zext i8 %val1.8 to i32 + + %val2.16 = load atomic i16, i16* %p16 unordered, align 2 + %val2 = zext i16 %val2.16 to i32 + + %res = add i32 %val1, %val2 + ret i32 %res +} + +define { i32, i64 } @load_acq(i32* %p32, i64* %p64) { +; CHECK-NOLSE-LABEL: load_acq: +; CHECK-NOLSE: ; %bb.0: +; CHECK-NOLSE-NEXT: ldar w0, [x0] +; CHECK-NOLSE-NEXT: ldar x1, [x1] +; CHECK-NOLSE-NEXT: ret +; +; CHECK-LSE-O1-LABEL: load_acq: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: ldar w0, [x0] +; CHECK-LSE-O1-NEXT: ldar x1, [x1] +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: load_acq: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: ldar w0, [x0] +; CHECK-LSE-O0-NEXT: ldar x1, [x1] +; CHECK-LSE-O0-NEXT: ret + %val32 = load atomic i32, i32* %p32 seq_cst, align 4 + %tmp = insertvalue { i32, i64 } undef, i32 %val32, 0 + + %val64 = load atomic i64, i64* %p64 acquire, align 8 + %res = insertvalue { i32, i64 } %tmp, i64 %val64, 1 + + ret { i32, i64 } %res +} + +define i32 @load_sext(i8* %p8, i16* %p16) { +; CHECK-NOLSE-O1-LABEL: load_sext: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: ldarb w8, [x0] +; CHECK-NOLSE-O1-NEXT: ldrh w9, [x1] +; CHECK-NOLSE-O1-NEXT: sxth w9, w9 +; CHECK-NOLSE-O1-NEXT: add w0, w9, w8, sxtb +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: load_sext: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: ldarb w9, [x0] +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x1] +; CHECK-NOLSE-O0-NEXT: sxth w8, w8 +; CHECK-NOLSE-O0-NEXT: add w0, w8, w9, sxtb +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: load_sext: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: ldarb w8, [x0] +; CHECK-LSE-O1-NEXT: ldrh w9, [x1] +; CHECK-LSE-O1-NEXT: sxth w9, w9 +; CHECK-LSE-O1-NEXT: add w0, w9, w8, sxtb +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: load_sext: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: ldarb w9, [x0] +; CHECK-LSE-O0-NEXT: ldrh w8, [x1] +; CHECK-LSE-O0-NEXT: sxth w8, w8 +; CHECK-LSE-O0-NEXT: add w0, w8, w9, sxtb +; CHECK-LSE-O0-NEXT: ret + %val1.8 = load atomic i8, i8* %p8 acquire, align 1 + %val1 = sext i8 %val1.8 to i32 + + %val2.16 = load atomic i16, i16* %p16 unordered, align 2 + %val2 = sext i16 %val2.16 to i32 + + %res = add i32 %val1, %val2 + ret i32 %res +} + +define void @store_trunc(i32 %val, i8* %p8, i16* %p16) { +; CHECK-NOLSE-LABEL: store_trunc: +; CHECK-NOLSE: ; %bb.0: +; CHECK-NOLSE-NEXT: stlrb w0, [x1] +; CHECK-NOLSE-NEXT: strh w0, [x2] +; CHECK-NOLSE-NEXT: ret +; +; CHECK-LSE-O1-LABEL: store_trunc: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: stlrb w0, [x1] +; CHECK-LSE-O1-NEXT: strh w0, [x2] +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: store_trunc: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: stlrb w0, [x1] +; CHECK-LSE-O0-NEXT: strh w0, [x2] +; CHECK-LSE-O0-NEXT: ret + %val8 = trunc i32 %val to i8 + store atomic i8 %val8, i8* %p8 seq_cst, align 1 + + %val16 = trunc i32 %val to i16 + store atomic i16 %val16, i16* %p16 monotonic, align 2 + + ret void +} + + attributes #0 = { nounwind }