Index: llvm/trunk/lib/Target/PowerPC/PPC.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPC.h +++ llvm/trunk/lib/Target/PowerPC/PPC.h @@ -52,6 +52,7 @@ void initializePPCVSXFMAMutatePass(PassRegistry&); void initializePPCBoolRetToIntPass(PassRegistry&); void initializePPCExpandISELPass(PassRegistry &); + void initializePPCTLSDynamicCallPass(PassRegistry &); extern char &PPCVSXFMAMutateID; namespace PPCII { Index: llvm/trunk/lib/Target/PowerPC/PPCTLSDynamicCall.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -52,6 +52,7 @@ protected: bool processBlock(MachineBasicBlock &MBB) { bool Changed = false; + bool NeedFence = true; bool Is64Bit = MBB.getParent()->getSubtarget().isPPC64(); for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); @@ -62,6 +63,16 @@ MI.getOpcode() != PPC::ADDItlsldLADDR && MI.getOpcode() != PPC::ADDItlsgdLADDR32 && MI.getOpcode() != PPC::ADDItlsldLADDR32) { + + // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP + // as scheduling fences, we skip creating fences if we already + // have existing ADJCALLSTACKDOWN/UP to avoid nesting, + // which causes verification error with -verify-machineinstrs. + if (MI.getOpcode() == PPC::ADJCALLSTACKDOWN) + NeedFence = false; + else if (MI.getOpcode() == PPC::ADJCALLSTACKUP) + NeedFence = true; + ++I; continue; } @@ -96,11 +107,15 @@ break; } - // Don't really need to save data to the stack - the clobbered + // We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr + // as schduling fence to avoid it is scheduled before + // mflr in the prologue and the address in LR is clobbered (PR25839). + // We don't really need to save data to the stack - the clobbered // registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr) // gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR). - BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0) - .addImm(0); + if (NeedFence) + BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0) + .addImm(0); // Expand into two ops built prior to the existing instruction. MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3) @@ -116,7 +131,8 @@ .addReg(GPR3)); Call->addOperand(MI.getOperand(3)); - BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0); + if (NeedFence) + BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0); BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg) .addReg(GPR3); Index: llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -93,6 +93,7 @@ PassRegistry &PR = *PassRegistry::getPassRegistry(); initializePPCBoolRetToIntPass(PR); initializePPCExpandISELPass(PR); + initializePPCTLSDynamicCallPass(PR); } /// Return the datalayout string of a subtarget. Index: llvm/trunk/test/CodeGen/PowerPC/tls_get_addr_fence1.mir =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/tls_get_addr_fence1.mir +++ llvm/trunk/test/CodeGen/PowerPC/tls_get_addr_fence1.mir @@ -0,0 +1,66 @@ +# ADJCALLSTACKDOWN and ADJCALLSTACKUP must be generated around TLS pseudo code as scheduling fence (PR25839). +# RUN: llc -mtriple=powerpc64le-linux-gnu -run-pass=ppc-tls-dynamic-call -verify-machineinstrs -o - %s | FileCheck %s + +--- | + target datalayout = "e-m:e-i64:64-n32:64" + target triple = "powerpc64le-unknown-linux-gnu" + + @tls_var = external thread_local local_unnamed_addr global i32 + + define i32 @tls_func() local_unnamed_addr { + entry: + %0 = load i32, i32* @tls_var + ret i32 %0 + } + +... +--- +name: tls_func +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } + - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' } + - { id: 2, class: g8rc, preferred-register: '' } +liveins: + - { reg: '%x2' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0.entry: + liveins: %x2 + %0 = ADDIStlsgdHA %x2, @tls_var + %1 = ADDItlsgdLADDR killed %0, @tls_var, @tls_var, implicit-def dead %x0, implicit-def dead %x3, implicit-def dead %x4, implicit-def dead %x5, implicit-def dead %x6, implicit-def dead %x7, implicit-def dead %x8, implicit-def dead %x9, implicit-def dead %x10, implicit-def dead %x11, implicit-def dead %x12, implicit-def dead %lr8, implicit-def dead %ctr8, implicit-def dead %cr0, implicit-def dead %cr1, implicit-def dead %cr5, implicit-def dead %cr6, implicit-def dead %cr7 + %2 = LWZ8 0, killed %1 :: (dereferenceable load 4 from @tls_var) + %x3 = COPY %2 + BLR8 implicit %lr8, implicit %rm, implicit %x3 + ; CHECK-LABEL: bb.0.entry + ; CHECK: %[[reg1:[0-9]+]] = ADDIStlsgdHA %x2, @tls_var + ; CHECK: ADJCALLSTACKDOWN 0, 0 + ; CHECK: %x3 = ADDItlsgdL %[[reg1]], @tls_var + ; CHECK: %x3 = GETtlsADDR %x3, @tls_var + ; CHECK: ADJCALLSTACKUP 0, 0 + ; CHECK: BLR8 +... Index: llvm/trunk/test/CodeGen/PowerPC/tls_get_addr_fence2.mir =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/tls_get_addr_fence2.mir +++ llvm/trunk/test/CodeGen/PowerPC/tls_get_addr_fence2.mir @@ -0,0 +1,65 @@ +# ADJCALLSTACKDOWN and ADJCALLSTACKUP should not be generated around TLS pseudo code if it is located within existing ADJCALLSTACKDOWN/ADJCALLSTACKUP pair. +# RUN: llc -mtriple=powerpc64le-linux-gnu -run-pass=ppc-tls-dynamic-call -verify-machineinstrs -o - %s | FileCheck %s + +--- | + target datalayout = "e-m:e-i64:64-n32:64" + target triple = "powerpc64le-unknown-linux-gnu" + + @tls_var = external thread_local local_unnamed_addr global i32 + + define i32 @tls_func() local_unnamed_addr { + entry: + %0 = load i32, i32* @tls_var + ret i32 %0 + } + +... +--- +name: tls_func +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } + - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' } + - { id: 2, class: g8rc, preferred-register: '' } +liveins: + - { reg: '%x2' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0.entry: + liveins: %x2 + ADJCALLSTACKDOWN 32, 0, implicit-def %r1, implicit %r1 + %0 = ADDIStlsgdHA %x2, @tls_var + %1 = ADDItlsgdLADDR killed %0, @tls_var, @tls_var, implicit-def dead %x0, implicit-def dead %x3, implicit-def dead %x4, implicit-def dead %x5, implicit-def dead %x6, implicit-def dead %x7, implicit-def dead %x8, implicit-def dead %x9, implicit-def dead %x10, implicit-def dead %x11, implicit-def dead %x12, implicit-def dead %lr8, implicit-def dead %ctr8, implicit-def dead %cr0, implicit-def dead %cr1, implicit-def dead %cr5, implicit-def dead %cr6, implicit-def dead %cr7 + %2 = LWZ8 0, killed %1 :: (dereferenceable load 4 from @tls_var) + %x3 = COPY %2 + ADJCALLSTACKUP 32, 0, implicit-def %r1, implicit %r1 + BLR8 implicit %lr8, implicit %rm, implicit %x3 + ; CHECK-LABEL: bb.0.entry + ; CHECK-NOT: ADJCALLSTACKDOWN 0, 0 + ; CHECK-NOT: ADJCALLSTACKUP 0, 0 + ; CHECK: BLR8 +...