Index: lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp =================================================================== --- lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -889,6 +889,13 @@ MI.eraseFromParent(); return true; } + case AArch64::MOVbaseTLS: { + unsigned DstReg = MI.getOperand(0).getReg(); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg) + .addImm(0xde82); + MI.eraseFromParent(); + return true; + } case AArch64::MOVi32imm: return expandMOVImm(MBB, MBBI, 32); Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -426,8 +426,10 @@ def MSRpstateImm4 : MSRpstateImm0_15; // The thread pointer (on Linux, at least, where this has been implemented) is -// TPIDR_EL0. -def : Pat<(AArch64threadpointer), (MRS 0xde82)>; +// TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects. +let hasSideEffects = 0 in +def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), + [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[]>; // The cycle counter PMC register is PMCCNTR_EL0. let Predicates = [HasPerfMon] in Index: test/CodeGen/AArch64/thread-pointer.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/thread-pointer.ll @@ -0,0 +1,60 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s + +@x = thread_local local_unnamed_addr global i32 0, align 4 +@y = thread_local local_unnamed_addr global i32 0, align 4 + +; Machine LICM should hoist the mrs into the loop preheader. +; CHECK-LABEL: @test1 +; CHECK: BB#1: +; CHECK: mrs x[[BASE:[0-9]+]], TPIDR_EL0 +; CHECK: add x[[REG1:[0-9]+]], x[[BASE]], :tprel_hi12:x +; CHECK: add x[[REG2:[0-9]+]], x[[REG1]], :tprel_lo12_nc:x +; +; CHECK: .LBB0_2: +; CHECK: ldr w0, [x[[REG2]]] +; CHECK: bl bar +; CHECK: sub w[[REG3:[0-9]+]], w{{[0-9]+}}, #1 +; CHECK: cbnz w[[REG3]], .LBB0_2 + +define void @test1(i32 %n) local_unnamed_addr { +entry: + %cmp3 = icmp sgt i32 %n, 0 + br i1 %cmp3, label %bb1, label %bb2 + +bb1: + br label %for.body + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %bb1 ] + %0 = load i32, i32* @x, align 4 + tail call void @bar(i32 %0) #2 + %inc = add nuw nsw i32 %i.04, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %bb2, label %for.body + +bb2: + ret void +} + +; Machine CSE should combine the the mrs between the load of %x and %y. +; CHECK-LABEL: @test2 +; CHECK: mrs x{{[0-9]+}}, TPIDR_EL0 +; CHECK-NOT: mrs x{{[0-9]+}}, TPIDR_EL0 +; CHECK: ret +define void @test2(i32 %c) local_unnamed_addr #0 { +entry: + %0 = load i32, i32* @x, align 4 + tail call void @bar(i32 %0) #2 + %cmp = icmp eq i32 %c, 0 + br i1 %cmp, label %if.end, label %if.then + +if.then: + %1 = load i32, i32* @y, align 4 + tail call void @bar(i32 %1) #2 + br label %if.end + +if.end: + ret void +} + +declare void @bar(i32) local_unnamed_addr