Index: lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp =================================================================== --- lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -889,6 +889,13 @@ MI.eraseFromParent(); return true; } + case AArch64::TP: { + unsigned DstReg = MI.getOperand(0).getReg(); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg) + .addImm(0xde82); + MI.eraseFromParent(); + return true; + } case AArch64::MOVi32imm: return expandMOVImm(MBB, MBBI, 32); Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -426,8 +426,10 @@ def MSRpstateImm4 : MSRpstateImm0_15; // The thread pointer (on Linux, at least, where this has been implemented) is -// TPIDR_EL0. -def : Pat<(AArch64threadpointer), (MRS 0xde82)>; +// TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects. +let hasSideEffects = 0 in +def TP : Pseudo<(outs GPR64:$dst), (ins), + [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[]>; // The cycle counter PMC register is PMCCNTR_EL0. let Predicates = [HasPerfMon] in Index: test/CodeGen/AArch64/licm-thread-pointer.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/licm-thread-pointer.ll @@ -0,0 +1,40 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s + +@x = thread_local local_unnamed_addr global i32 0, align 4 + +; CHECK-LABEL: @test1 +; CHECK: BB#1: +; CHECK: mrs x[[BASE:[0-9]+]], TPIDR_EL0 +; CHECK: add x[[REG1:[0-9]+]], x[[BASE]], :tprel_hi12:x +; CHECK: add x[[REG2:[0-9]+]], x[[REG1]], :tprel_lo12_nc:x +; +; CHECK: .LBB0_2: +; CHECK: ldr w0, [x[[REG2]]] +; CHECK: bl bar +; CHECK: sub w[[REG3:[0-9]+]], w{{[0-9]+}}, #1 +; CHECK: cbnz w[[REG3]], .LBB0_2 + +define void @test1(i32 %N) local_unnamed_addr { +entry: + %cmp3 = icmp sgt i32 %N, 0 + br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + br label %for.body + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %0 = load i32, i32* @x, align 4 + tail call void @bar(i32 %0) #2 + %inc = add nuw nsw i32 %i.04, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +declare void @bar(i32) local_unnamed_addr