Index: llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -101,6 +101,11 @@ cl::desc("Enable static hinting of branches on ppc"), cl::Hidden); +static cl::opt EnableTLSOpt( + "ppc-tls-opt", cl::init(true), + cl::desc("Enable tls optimization peephole"), + cl::Hidden); + enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64, ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32, ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 }; @@ -199,6 +204,14 @@ bool tryBitPermutation(SDNode *N); bool tryIntCompareInGPR(SDNode *N); + // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into + // an X-Form load instruction with the offset being a relocation coming from + // the PPCISD::ADD_TLS. + bool tryTLSXFormLoad(LoadSDNode *N); + // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into + // an X-Form store instruction with the offset being a relocation coming from + // the PPCISD::ADD_TLS. + bool tryTLSXFormStore(StoreSDNode *N); /// SelectCC - Select a comparison of the specified values with the /// specified condition code, returning the CR# of the expression. SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, @@ -582,6 +595,90 @@ return false; } +bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { + SDValue Base = ST->getBasePtr(); + if (Base.getOpcode() != PPCISD::ADD_TLS) + return false; + SDValue Offset = ST->getOffset(); + if (!Offset.isUndef()) + return false; + + SDLoc dl(ST); + EVT MemVT = ST->getMemoryVT(); + EVT RegVT = ST->getValue().getValueType(); + + unsigned Opcode; + switch (MemVT.getSimpleVT().SimpleTy) { + default: + return false; + case MVT::i8: { + Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS; + break; + } + case MVT::i16: { + Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS; + break; + } + case MVT::i32: { + Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS; + break; + } + case MVT::i64: { + Opcode = PPC::STDXTLS; + break; + } + } + SDValue Chain = ST->getChain(); + SDVTList VTs = ST->getVTList(); + SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1), + Chain}; + SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); + transferMemOperands(ST, MN); + ReplaceNode(ST, MN); + return true; +} + +bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { + SDValue Base = LD->getBasePtr(); + if (Base.getOpcode() != PPCISD::ADD_TLS) + return false; + SDValue Offset = LD->getOffset(); + if (!Offset.isUndef()) + return false; + + SDLoc dl(LD); + EVT MemVT = LD->getMemoryVT(); + EVT RegVT = LD->getValueType(0); + unsigned Opcode; + switch (MemVT.getSimpleVT().SimpleTy) { + default: + return false; + case MVT::i8: { + Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS; + break; + } + case MVT::i16: { + Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS; + break; + } + case MVT::i32: { + Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS; + break; + } + case MVT::i64: { + Opcode = PPC::LDXTLS; + break; + } + } + SDValue Chain = LD->getChain(); + SDVTList VTs = LD->getVTList(); + SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain}; + SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); + transferMemOperands(LD, MN); + ReplaceNode(LD, MN); + return true; +} + /// Turn an or of two masked values into the rotate left word immediate then /// mask insert (rlwimi) instruction. bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { @@ -3949,14 +4046,28 @@ } } + case ISD::STORE: { + // Change TLS initial-exec D-form stores to X-form stores. + StoreSDNode *ST = cast(N); + if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() && + ST->getAddressingMode() != ISD::PRE_INC) + if (tryTLSXFormStore(ST)) + return; + break; + } case ISD::LOAD: { // Handle preincrement loads. LoadSDNode *LD = cast(N); EVT LoadedVT = LD->getMemoryVT(); // Normal loads are handled by code generated from the .td file. - if (LD->getAddressingMode() != ISD::PRE_INC) + if (LD->getAddressingMode() != ISD::PRE_INC) { + // Change TLS initial-exec D-form loads to X-form loads. + if (EnableTLSOpt && PPCSubTarget->isELFv2ABI()) + if (tryTLSXFormLoad(LD)) + return; break; + } SDValue Offset = LD->getOffset(); if (Offset.getOpcode() == ISD::TargetConstant || Index: llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td @@ -499,7 +499,49 @@ def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc_nox0:$rA, tlsreg:$rB), "add $rT, $rA, $rB", IIC_IntSimple, [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>; - +let mayLoad = 1 in { +def LBZXTLS : XForm_1<31, 87, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "lbzx $rD, $rA, $rB", IIC_LdStLoad, []>; +def LHZXTLS : XForm_1<31, 279, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "lhzx $rD, $rA, $rB", IIC_LdStLoad, []>; +def LWZXTLS : XForm_1<31, 23, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "lwzx $rD, $rA, $rB", IIC_LdStLoad, []>; +def LDXTLS : XForm_1<31, 21, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "ldx $rD, $rA, $rB", IIC_LdStLD, []>, isPPC64; +def LBZXTLS_32 : XForm_1<31, 87, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "lbzx $rD, $rA, $rB", IIC_LdStLoad, []>; +def LHZXTLS_32 : XForm_1<31, 279, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "lhzx $rD, $rA, $rB", IIC_LdStLoad, []>; +def LWZXTLS_32 : XForm_1<31, 23, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "lwzx $rD, $rA, $rB", IIC_LdStLoad, []>; + +} + +let mayStore = 1 in { +def STBXTLS : XForm_8<31, 215, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "stbx $rS, $rA, $rB", IIC_LdStStore, []>, + PPC970_DGroup_Cracked; +def STHXTLS : XForm_8<31, 407, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "sthx $rS, $rA, $rB", IIC_LdStStore, []>, + PPC970_DGroup_Cracked; +def STWXTLS : XForm_8<31, 151, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "stwx $rS, $rA, $rB", IIC_LdStStore, []>, + PPC970_DGroup_Cracked; +def STDXTLS : XForm_8<31, 149, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "stdx $rS, $rA, $rB", IIC_LdStSTD, []>, isPPC64, + PPC970_DGroup_Cracked; +def STBXTLS_32 : XForm_8<31, 215, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "stbx $rS, $rA, $rB", IIC_LdStStore, []>, + PPC970_DGroup_Cracked; +def STHXTLS_32 : XForm_8<31, 407, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "sthx $rS, $rA, $rB", IIC_LdStStore, []>, + PPC970_DGroup_Cracked; +def STWXTLS_32 : XForm_8<31, 151, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "stwx $rS, $rA, $rB", IIC_LdStStore, []>, + PPC970_DGroup_Cracked; + +} + let isCommutable = 1 in defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "addc", "$rT, $rA, $rB", IIC_IntGeneral, Index: llvm/trunk/test/CodeGen/PowerPC/tls-pie-xform.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/tls-pie-xform.ll +++ llvm/trunk/test/CodeGen/PowerPC/tls-pie-xform.ll @@ -0,0 +1,169 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=CHECK + +@var_char = external thread_local local_unnamed_addr global i8, align 1 +@var_short = external thread_local local_unnamed_addr global i16, align 2 +@var_int = external thread_local local_unnamed_addr global i32, align 4 +@var_long_long = external thread_local local_unnamed_addr global i64, align 8 + +define dso_local zeroext i8 @test_char_one() { +; CHECK-LABEL: test_char_one: +; CHECK: # %bb.0: # %entry +; CHECK: addis 3, 2, var_char@got@tprel@ha +; CHECK-NEXT: ld 3, var_char@got@tprel@l(3) +; CHECK-NEXT: lbzx 3, 3, var_char@tls +entry: + %0 = load i8, i8* @var_char, align 1, !tbaa !4 + ret i8 %0 +} + +define dso_local void @test_char_two(i32 signext %a) { +; CHECK-LABEL: test_char_two: +; CHECK: # %bb.0: # %entry +; CHECK: addis 4, 2, var_char@got@tprel@ha +; CHECK-NEXT: ld 4, var_char@got@tprel@l(4) +; CHECK-NEXT: stbx 3, 4, var_char@tls +entry: + %conv = trunc i32 %a to i8 + store i8 %conv, i8* @var_char, align 1, !tbaa !4 + ret void +} + +define dso_local zeroext i8 @test_char_three(i8 zeroext %a) { +; CHECK-LABEL: test_char_three: +; CHECK: # %bb.0: # %entry +; CHECK: addis 4, 2, var_char@got@tprel@ha +; CHECK-NEXT: ld 4, var_char@got@tprel@l(4) +; CHECK-NEXT: lbzx 5, 4, var_char@tls +; CHECK: stbx {{[0-9]+}}, 4, var_char@tls +entry: + %0 = load i8, i8* @var_char, align 1, !tbaa !4 + %add = add i8 %0, %a + store i8 %add, i8* @var_char, align 1, !tbaa !4 + ret i8 %add +} + +define dso_local signext i16 @test_short_one() { +; CHECK-LABEL: test_short_one: +; CHECK: # %bb.0: # %entry +; CHECK: addis 3, 2, var_short@got@tprel@ha +; CHECK-NEXT: ld 3, var_short@got@tprel@l(3) +; CHECK-NEXT: lhzx 3, 3, var_short@tls +entry: + %0 = load i16, i16* @var_short, align 2, !tbaa !7 + ret i16 %0 +} + +define dso_local void @test_short_two(i32 signext %a) { +; CHECK-LABEL: test_short_two: +; CHECK: # %bb.0: # %entry +; CHECK: addis 4, 2, var_short@got@tprel@ha +; CHECK-NEXT: ld 4, var_short@got@tprel@l(4) +; CHECK-NEXT: sthx 3, 4, var_short@tls +entry: + %conv = trunc i32 %a to i16 + store i16 %conv, i16* @var_short, align 2, !tbaa !7 + ret void +} + +define dso_local signext i16 @test_short_three(i16 signext %a) { +; CHECK-LABEL: test_short_three: +; CHECK: # %bb.0: # %entry +; CHECK: addis 4, 2, var_short@got@tprel@ha +; CHECK-NEXT: ld 4, var_short@got@tprel@l(4) +; CHECK-NEXT: lhzx 5, 4, var_short@tls +; CHECK: sthx {{[0-9]+}}, 4, var_short@tls +entry: + %0 = load i16, i16* @var_short, align 2, !tbaa !7 + %add = add i16 %0, %a + store i16 %add, i16* @var_short, align 2, !tbaa !7 + ret i16 %add +} + +define dso_local signext i32 @test_int_one() { +; CHECK-LABEL: test_int_one: +; CHECK: # %bb.0: # %entry +; CHECK: addis 3, 2, var_int@got@tprel@ha +; CHECK-NEXT: ld 3, var_int@got@tprel@l(3) +; CHECK-NEXT: lwzx 3, 3, var_int@tls +entry: + %0 = load i32, i32* @var_int, align 4, !tbaa !9 + ret i32 %0 +} + +define dso_local void @test_int_two(i32 signext %a) { +; CHECK-LABEL: test_int_two: +; CHECK: # %bb.0: # %entry +; CHECK: addis 4, 2, var_int@got@tprel@ha +; CHECK-NEXT: ld 4, var_int@got@tprel@l(4) +; CHECK-NEXT: stwx 3, 4, var_int@tls +entry: + store i32 %a, i32* @var_int, align 4, !tbaa !9 + ret void +} + +define dso_local signext i32 @test_int_three(i32 signext %a) { +; CHECK-LABEL: test_int_three: +; CHECK: # %bb.0: # %entry +; CHECK: addis 4, 2, var_int@got@tprel@ha +; CHECK-NEXT: ld 4, var_int@got@tprel@l(4) +; CHECK-NEXT: lwzx 5, 4, var_int@tls +; CHECK: stwx {{[0-9]+}}, 4, var_int@tls +entry: + %0 = load i32, i32* @var_int, align 4, !tbaa !9 + %add = add nsw i32 %0, %a + store i32 %add, i32* @var_int, align 4, !tbaa !9 + ret i32 %add +} + +define dso_local i64 @test_longlong_one() { +; CHECK-LABEL: test_longlong_one: +; CHECK: # %bb.0: # %entry +; CHECK: addis 3, 2, var_long_long@got@tprel@ha +; CHECK-NEXT: ld 3, var_long_long@got@tprel@l(3) +; CHECK-NEXT: ldx 3, 3, var_long_long@tls +entry: + %0 = load i64, i64* @var_long_long, align 8, !tbaa !11 + ret i64 %0 +} + +define dso_local void @test_longlong_two(i32 signext %a) { +; CHECK-LABEL: test_longlong_two: +; CHECK: # %bb.0: # %entry +; CHECK: addis 4, 2, var_long_long@got@tprel@ha +; CHECK-NEXT: ld 4, var_long_long@got@tprel@l(4) +; CHECK-NEXT: stdx 3, 4, var_long_long@tls +entry: + %conv = sext i32 %a to i64 + store i64 %conv, i64* @var_long_long, align 8, !tbaa !11 + ret void +} + +define dso_local i64 @test_longlong_three(i64 %a) { +; CHECK-LABEL: test_longlong_three: +; CHECK: # %bb.0: # %entry +; CHECK: addis 4, 2, var_long_long@got@tprel@ha +; CHECK-NEXT: ld 4, var_long_long@got@tprel@l(4) +; CHECK-NEXT: ldx 5, 4, var_long_long@tls +; CHECK: stdx {{[0-9]+}}, 4, var_long_long@tls +entry: + %0 = load i64, i64* @var_long_long, align 8, !tbaa !11 + %add = add nsw i64 %0, %a + store i64 %add, i64* @var_long_long, align 8, !tbaa !11 + ret i64 %add +} + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 1} +!2 = !{i32 7, !"PIE Level", i32 1} +!4 = !{!5, !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"short", !5, i64 0} +!9 = !{!10, !10, i64 0} +!10 = !{!"int", !5, i64 0} +!11 = !{!12, !12, i64 0} +!12 = !{!"long long", !5, i64 0}