Index: llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h +++ llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h @@ -581,6 +581,10 @@ /// function. Writing to a constant register has no effect. bool isConstantPhysReg(unsigned PhysReg) const; + /// Returns true if either isConstantPhysReg or TRI->isCallerPreservedPhysReg + /// returns true. This is a utility member function. + bool isCallerPreservedOrConstPhysReg(unsigned PhysReg) const; + /// Get an iterator over the pressure sets affected by the given physical or /// virtual register. If RegUnit is physical, it must be a register unit (from /// MCRegUnitIterator). Index: llvm/trunk/lib/CodeGen/MachineCSE.cpp =================================================================== --- llvm/trunk/lib/CodeGen/MachineCSE.cpp +++ llvm/trunk/lib/CodeGen/MachineCSE.cpp @@ -250,8 +250,8 @@ continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) continue; - // Reading constant physregs is ok. - if (!MRI->isConstantPhysReg(Reg)) + // Reading either caller preserved or constant physregs is ok. + if (!MRI->isCallerPreservedOrConstPhysReg(Reg)) for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) PhysRefs.insert(*AI); } Index: llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp +++ llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp @@ -487,6 +487,13 @@ return true; } +bool +MachineRegisterInfo::isCallerPreservedOrConstPhysReg(unsigned PhysReg) const { + const TargetRegisterInfo *TRI = getTargetRegisterInfo(); + return isConstantPhysReg(PhysReg) || + TRI->isCallerPreservedPhysReg(PhysReg, *MF); +} + /// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the /// specified register as undefined which causes the DBG_VALUE to be /// deleted during LiveDebugVariables analysis. Index: llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll +++ llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll @@ -0,0 +1,63 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; The instructions addis,addi, bl are used to calculate the address of TLS +; thread local variables. These TLS access code sequences are generated +; repeatedly every time the thread local variable is accessed. By communicating +; to Machine CSE that X2 is guaranteed to have the same value within the same +; function call (so called Caller Preserved Physical Register), the redudant +; TLS access code sequences are cleaned up. + +%"struct.CC::TT" = type { i64, i32 } +%class.CC = type { %struct.SS } +%struct.SS = type { void ()* } + +@_ZN2CC2ccE = external thread_local global %"struct.CC::TT", align 8 + +define noalias i8* @_ZN2CC3funEv(%class.CC* %this) { +; CHECK-LABEL: _ZN2CC3funEv: +; CHECK: mflr 0 +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -48(1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std 30, 32(1) +; CHECK-NEXT: mr 30, 3 +; CHECK-NEXT: ld 12, 0(30) +; CHECK-NEXT: std 2, 24(1) +; CHECK-NEXT: mtctr 12 +; CHECK-NEXT: bctrl +; CHECK-NEXT: ld 2, 24(1) +; CHECK-NEXT: addis 3, 2, _ZN2CC2ccE@got@tlsgd@ha +; CHECK-NEXT: addi 3, 3, _ZN2CC2ccE@got@tlsgd@l +; CHECK-NEXT: bl __tls_get_addr(_ZN2CC2ccE@tlsgd) +; CHECK-NEXT: nop +; CHECK-NEXT: ld 4, 0(3) +; CHECK-NEXT: cmpldi 4, 0 +; CHECK-NEXT: beq 0, .LBB0_2 +; CHECK: addi 4, 3, 8 +; CHECK-NEXT: mr 3, 30 +; CHECK-NEXT: bl _ZN2CC3barEPi +; CHECK-NEXT: nop +; CHECK: ld 30, 32(1) +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + %foo = getelementptr inbounds %class.CC, %class.CC* %this, i64 0, i32 0, i32 0 + %0 = load void ()*, void ()** %foo, align 8 + tail call void %0() + %1 = load i64, i64* getelementptr inbounds (%"struct.CC::TT", %"struct.CC::TT"* @_ZN2CC2ccE, i64 0, i32 0) + %tobool = icmp eq i64 %1, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: + tail call void @_ZN2CC3barEPi(%class.CC* nonnull %this, i32* getelementptr inbounds (%"struct.CC::TT", %"struct.CC::TT"* @_ZN2CC2ccE, i64 0, i32 1)) + br label %if.end + +if.end: + ret i8* null +} + +declare void @_ZN2CC3barEPi(%class.CC*, i32*) Index: llvm/trunk/test/CodeGen/PowerPC/licm-tocReg.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/licm-tocReg.ll +++ llvm/trunk/test/CodeGen/PowerPC/licm-tocReg.ll @@ -1,20 +1,20 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -; The instructions ADDIStocHA/LDtocL are used to calculate the address of -; globals. The ones that are in bb.3.if.end could not be hoisted by Machine +; The instructions ADDIStocHA/LDtocL are used to calculate the address of +; globals. The ones that are in bb.3.if.end could not be hoisted by Machine ; LICM due to BCTRL_LDinto_toc in bb2.if.then. This call causes the compiler ; to insert a save TOC to stack before the call and load into X2 to restore TOC -; after. By communicating to Machine LICM that X2 is guaranteed to have the +; after. By communicating to Machine LICM that X2 is guaranteed to have the ; same value before and after BCTRL_LDinto_toc, these instructions can be ; hoisted out of bb.3.if.end to outside of the loop. ; Pre Machine LICM MIR ; -;body: +;body: ; bb.0.entry: ; successors: %bb.2.if.then(0x40000000), %bb.3.if.end(0x40000000) ; liveins: %x3 -; +; ; %4 = COPY %x3 ; %5 = ADDIStocHA %x2, @ga ; %6 = LDtocL @ga, killed %5 :: (load 8 from got) @@ -26,7 +26,7 @@ ; %11 = CMPW killed %7, killed %10 ; BCC 44, killed %11, %bb.2.if.then ; B %bb.3.if.end -; +; ; bb.2.if.then: ; %1 = PHI %0, %bb.0.entry, %3, %bb.3.if.end ; ADJCALLSTACKDOWN 32, 0, implicit-def dead %r1, implicit %r1 @@ -41,10 +41,10 @@ ; %22 = COPY %x3 ; %x3 = COPY %22 ; BLR8 implicit %lr8, implicit %rm, implicit %x3 -; +; ; bb.3.if.end: ; successors: %bb.2.if.then(0x04000000), %bb.3.if.end(0x7c000000) -; +; ; %2 = PHI %0, %bb.0.entry, %3, %bb.3.if.end ; %12 = ADDI %2, 1 ; %13 = ADDIStocHA %x2, @ga @@ -62,27 +62,23 @@ @ga = external global i32, align 4 @gb = external global i32, align 4 -; Function Attrs: nounwind define signext i32 @test(i32 (i32)* nocapture %FP) local_unnamed_addr #0 { ; CHECK-LABEL: test: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: addis 4, 2, .LC0@toc@ha -; CHECK-NEXT: addis 5, 2, .LC1@toc@ha -; CHECK-NEXT: ld 4, .LC0@toc@l(4) -; CHECK-NEXT: ld 5, .LC1@toc@l(5) -; CHECK-NEXT: lwz 6, 0(4) -; CHECK-NEXT: lwz 5, 0(5) -; CHECK-NEXT: cmpw 6, 5 -; CHECK-NEXT: lwz 5, 0(4) +; CHECK-NEXT: addis 6, 2, .LC0@toc@ha +; CHECK-NEXT: addis 4, 2, .LC1@toc@ha +; CHECK-NEXT: ld 5, .LC1@toc@l(4) +; CHECK-NEXT: ld 6, .LC0@toc@l(6) +; CHECK-NEXT: lwz 4, 0(5) +; CHECK-NEXT: lwz 7, 0(6) +; CHECK-NEXT: cmpw 4, 7 +; CHECK-NEXT: lwz 7, 0(5) ; CHECK-NEXT: mr 4, 3 -; CHECK-NEXT: bgt 0, .LBB0_3 -; CHECK-NEXT: # BB#1: -; CHECK-NEXT: addis 3, 2, .LC0@toc@ha -; CHECK-NEXT: addis 6, 2, .LC1@toc@ha -; CHECK-NEXT: ld 3, .LC0@toc@l(3) -; CHECK-NEXT: ld 6, .LC1@toc@l(6) +; CHECK-NEXT: bgt 0, .LBB0_2 +; CHECK-NOT: addis {{[0-9]+}}, 2, .LC0@toc@ha +; CHECK-NOT: addis {{[0-9]+}}, 2, .LC1@toc@ha ; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB0_2: # %if.end +; CHECK-NEXT: .LBB0_1: # %if.end ; CHECK-NOT: addis {{[0-9]+}}, 2, .LC0@toc@ha ; CHECK-NOT: addis {{[0-9]+}}, 2, .LC1@toc@ha ; CHECK: blr