diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -262,6 +262,14 @@ None) == PPC::AM_DForm; } + /// SelectPCRelForm - Returns true if address N can be represented by + /// PC-Relative addressing mode. + bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp, + SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + None) == PPC::AM_PCRel; + } + /// SelectXForm - Returns true if address N can be represented by the /// addressing mode of XForm instructions (an indexed [r+r] operation). bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -494,6 +494,11 @@ /// Constrained floating point add in round-to-zero mode. STRICT_FADDRTZ, + // NOTE: The nodes below may require PC-Rel specific patterns if the + // address could be PC-Relative. When adding new nodes below, consider + // whether or not the address can be PC-Relative and add the corresponding + // PC-relative patterns and tests. + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or @@ -713,6 +718,7 @@ AM_DSForm, AM_DQForm, AM_XForm, + AM_PCRel }; } // end namespace PPC diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -17357,6 +17357,14 @@ } } +static bool isPCRelNode(SDValue N) { + return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR || + isValidPCRelNode(N) || + isValidPCRelNode(N) || + isValidPCRelNode(N) || + isValidPCRelNode(N)); +} + /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute /// the address flags of the load/store instruction that is to be matched. unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N, @@ -17374,6 +17382,10 @@ if (Subtarget.hasSPE()) FlagSet |= PPC::MOF_SubtargetSPE; + // Check if we have a PCRel node and return early. + if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N)) + return FlagSet; + // Mark this as something we don't want to handle here if it is atomic // or pre-increment instruction. if (const LSBaseSDNode *LSB = dyn_cast(Parent)) @@ -17518,6 +17530,14 @@ // Select an X-Form load if it is not. setXFormForUnalignedFI(N, Flags, Mode); + // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node. + if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) { + assert(Subtarget.isUsingPCRelativeCalls() && + "Must be using PC-Relative calls when a valid PC-Relative node is " + "present!"); + Mode = PPC::AM_PCRel; + } + // Set Base and Disp accordingly depending on the address mode. switch (Mode) { case PPC::AM_DForm: @@ -17589,6 +17609,12 @@ Base = N; break; } + case PPC::AM_PCRel: { + // When selecting PC-Relative instructions, "Base" is not utilized as + // we select the address as [PC+imm]. + Disp = N; + break; + } case PPC::AM_None: break; default: { // By default, X-Form is always available to be selected. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1152,15 +1152,13 @@ /// This is just the offset part of iaddr, used for preinc. def iaddroff : ComplexPattern; -// PC Relative Address -def pcreladdr : ComplexPattern; - // Load and Store Instruction Selection addressing modes. def DForm : ComplexPattern; def DSForm : ComplexPattern; def DQForm : ComplexPattern; def XForm : ComplexPattern; def ForceXForm : ComplexPattern; +def PCRelForm : ComplexPattern; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1682,178 +1682,171 @@ } } -// TODO: We have an added complexity of 500 here. This is only a temporary -// solution to have tablegen consider these patterns first. The way we do -// addressing for PowerPC is complex depending on available D form, X form, or -// aligned D form loads/stores like DS and DQ forms. The prefixed -// instructions in this file also add additional PC Relative loads/stores -// and D form loads/stores with 34 bit immediates. It is very difficult to force -// instruction selection to consistently pick these first without the current -// added complexity. Once pc-relative implementation is complete, a set of -// follow-up patches will address this refactoring and the AddedComplexity will -// be removed. -let Predicates = [PCRelativeMemops], AddedComplexity = 500 in { +let Predicates = [PCRelativeMemops] in { // Load i32 - def : Pat<(i32 (zextloadi1 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i32 (zextloadi1 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZpc $ga, 0)>; - def : Pat<(i32 (extloadi1 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i32 (extloadi1 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZpc $ga, 0)>; - def : Pat<(i32 (zextloadi8 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i32 (zextloadi8 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZpc $ga, 0)>; - def : Pat<(i32 (extloadi8 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i32 (extloadi8 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZpc $ga, 0)>; - def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), (PLHApc $ga, 0)>; - def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), (PLHZpc $ga, 0)>; - def : Pat<(i32 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i32 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))), (PLHZpc $ga, 0)>; - def : Pat<(i32 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLWZpc $ga, 0)>; + def : Pat<(i32 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLWZpc $ga, 0)>; // Store i32 - def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTBpc $RS, $ga, 0)>; - def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTHpc $RS, $ga, 0)>; - def : Pat<(store i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTWpc $RS, $ga, 0)>; // Load i64 - def : Pat<(i64 (zextloadi1 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (zextloadi1 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZ8pc $ga, 0)>; - def : Pat<(i64 (extloadi1 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (extloadi1 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZ8pc $ga, 0)>; - def : Pat<(i64 (zextloadi8 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (zextloadi8 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZ8pc $ga, 0)>; - def : Pat<(i64 (extloadi8 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (extloadi8 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZ8pc $ga, 0)>; - def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), (PLHA8pc $ga, 0)>; - def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), (PLHZ8pc $ga, 0)>; - def : Pat<(i64 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))), (PLHZ8pc $ga, 0)>; - def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr PCRelForm:$ga))), (PLWZ8pc $ga, 0)>; - def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr PCRelForm:$ga))), (PLWA8pc $ga, 0)>; - def : Pat<(i64 (extloadi32 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (extloadi32 (PPCmatpcreladdr PCRelForm:$ga))), (PLWZ8pc $ga, 0)>; - def : Pat<(i64 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLDpc $ga, 0)>; + def : Pat<(i64 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLDpc $ga, 0)>; // Store i64 - def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTB8pc $RS, $ga, 0)>; - def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTH8pc $RS, $ga, 0)>; - def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTW8pc $RS, $ga, 0)>; - def : Pat<(store i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTDpc $RS, $ga, 0)>; // Load f32 - def : Pat<(f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFSpc $addr, 0)>; + def : Pat<(f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFSpc $addr, 0)>; // Store f32 - def : Pat<(store f32:$FRS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store f32:$FRS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTFSpc $FRS, $ga, 0)>; // Load f64 - def : Pat<(f64 (extloadf32 (PPCmatpcreladdr pcreladdr:$addr))), + def : Pat<(f64 (extloadf32 (PPCmatpcreladdr PCRelForm:$addr))), (COPY_TO_REGCLASS (PLFSpc $addr, 0), VSFRC)>; - def : Pat<(f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFDpc $addr, 0)>; + def : Pat<(f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFDpc $addr, 0)>; // Store f64 - def : Pat<(store f64:$FRS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store f64:$FRS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTFDpc $FRS, $ga, 0)>; // Load f128 - def : Pat<(f128 (load (PPCmatpcreladdr pcreladdr:$addr))), + def : Pat<(f128 (load (PPCmatpcreladdr PCRelForm:$addr))), (COPY_TO_REGCLASS (PLXVpc $addr, 0), VRRC)>; // Store f128 - def : Pat<(store f128:$XS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store f128:$XS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTXVpc (COPY_TO_REGCLASS $XS, VSRC), $ga, 0)>; // Load v4i32 - def : Pat<(v4i32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>; + def : Pat<(v4i32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; // Store v4i32 - def : Pat<(store v4i32:$XS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store v4i32:$XS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTXVpc $XS, $ga, 0)>; // Load v2i64 - def : Pat<(v2i64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>; + def : Pat<(v2i64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; // Store v2i64 - def : Pat<(store v2i64:$XS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store v2i64:$XS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTXVpc $XS, $ga, 0)>; // Load v4f32 - def : Pat<(v4f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>; + def : Pat<(v4f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; // Store v4f32 - def : Pat<(store v4f32:$XS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store v4f32:$XS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTXVpc $XS, $ga, 0)>; // Load v2f64 - def : Pat<(v2f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>; + def : Pat<(v2f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; // Store v2f64 - def : Pat<(store v2f64:$XS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store v2f64:$XS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTXVpc $XS, $ga, 0)>; // Atomic Load - def : Pat<(atomic_load_8 (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(atomic_load_8 (PPCmatpcreladdr PCRelForm:$ga)), (PLBZpc $ga, 0)>; - def : Pat<(atomic_load_16 (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(atomic_load_16 (PPCmatpcreladdr PCRelForm:$ga)), (PLHZpc $ga, 0)>; - def : Pat<(atomic_load_32 (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(atomic_load_32 (PPCmatpcreladdr PCRelForm:$ga)), (PLWZpc $ga, 0)>; - def : Pat<(atomic_load_64 (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(atomic_load_64 (PPCmatpcreladdr PCRelForm:$ga)), (PLDpc $ga, 0)>; // Atomic Store - def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS), + def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS), (PSTBpc $RS, $ga, 0)>; - def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS), + def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS), (PSTHpc $RS, $ga, 0)>; - def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS), + def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS), (PSTWpc $RS, $ga, 0)>; - def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS), + def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), (PSTB8pc $RS, $ga, 0)>; - def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS), + def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), (PSTH8pc $RS, $ga, 0)>; - def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS), + def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), (PSTW8pc $RS, $ga, 0)>; - def : Pat<(atomic_store_64 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS), + def : Pat<(atomic_store_64 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), (PSTDpc $RS, $ga, 0)>; // Special Cases For PPCstore_scal_int_from_vsr def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), - (PPCmatpcreladdr pcreladdr:$dst), 8), + (PPCmatpcreladdr PCRelForm:$dst), 8), (PSTXSDpc (XSCVDPSXDS f64:$src), $dst, 0)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), - (PPCmatpcreladdr pcreladdr:$dst), 8), + (PPCmatpcreladdr PCRelForm:$dst), 8), (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), $dst, 0)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), - (PPCmatpcreladdr pcreladdr:$dst), 8), + (PPCmatpcreladdr PCRelForm:$dst), 8), (PSTXSDpc (XSCVDPUXDS f64:$src), $dst, 0)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), - (PPCmatpcreladdr pcreladdr:$dst), 8), + (PPCmatpcreladdr PCRelForm:$dst), 8), (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), $dst, 0)>; + def : Pat<(v4f32 (PPCldvsxlh (PPCmatpcreladdr PCRelForm:$addr))), + (SUBREG_TO_REG (i64 1), (PLFDpc $addr, 0), sub_64)>; + // If the PPCmatpcreladdr node is not caught by any other pattern it should be // caught here and turned into a paddi instruction to materialize the address. - def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>; + def : Pat<(PPCmatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>; // PPCtlsdynamatpcreladdr node is used for TLS dynamic models to materialize // tls global address with paddi instruction. - def : Pat<(PPCtlsdynamatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>; + def : Pat<(PPCtlsdynamatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>; // PPCtlslocalexecmataddr node is used for TLS local exec models to // materialize tls global address with paddi instruction. def : Pat<(PPCaddTls i64:$in, (PPCtlslocalexecmataddr tglobaltlsaddr:$addr)), diff --git a/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll b/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll --- a/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll @@ -42,6 +42,7 @@ @GlobSt11 = dso_local local_unnamed_addr global [20 x <16 x i8>] zeroinitializer, align 16 @GlobLd12 = dso_local local_unnamed_addr global [20 x <16 x i8>] zeroinitializer, align 16 @GlobSt12 = dso_local local_unnamed_addr global [20 x <16 x i8>] zeroinitializer, align 16 +@GlobF128 = dso_local local_unnamed_addr global [20 x fp128] zeroinitializer, align 16 ; Function Attrs: nofree norecurse nounwind uwtable willreturn define dso_local void @testGlob1PtrPlus0() { @@ -2270,3 +2271,323 @@ store <16 x i8> %0, <16 x i8>* %arrayidx1, align 16 ret void } + +; Function Attrs: nofree norecurse nounwind uwtable willreturn +define dso_local void @Atomic_LdSt_i8() { +; CHECK-P10-LE-LABEL: Atomic_LdSt_i8: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: plbz r3, GlobLd1@PCREL(0), 1 +; CHECK-P10-LE-NEXT: pstb r3, GlobSt1@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: Atomic_LdSt_i8: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: addis r3, r2, GlobLd1@toc@ha +; CHECK-P10-BE-NEXT: addis r4, r2, GlobSt1@toc@ha +; CHECK-P10-BE-NEXT: lbz r3, GlobLd1@toc@l(r3) +; CHECK-P10-BE-NEXT: stb r3, GlobSt1@toc@l(r4) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-LABEL: Atomic_LdSt_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, GlobLd1@toc@ha +; CHECK-NEXT: addis r4, r2, GlobSt1@toc@ha +; CHECK-NEXT: lbz r3, GlobLd1@toc@l(r3) +; CHECK-NEXT: stb r3, GlobSt1@toc@l(r4) +; CHECK-NEXT: blr +entry: + %0 = load atomic i8, i8* getelementptr inbounds ([20 x i8], [20 x i8]* @GlobLd1, i64 0, i64 0) monotonic, align 1 + store atomic i8 %0, i8* getelementptr inbounds ([20 x i8], [20 x i8]* @GlobSt1, i64 0, i64 0) monotonic, align 1 + ret void +} + +; Function Attrs: nofree norecurse nounwind uwtable willreturn +define dso_local void @Atomic_LdSt_i16() { +; CHECK-P10-LE-LABEL: Atomic_LdSt_i16: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: plhz r3, GlobLd3@PCREL(0), 1 +; CHECK-P10-LE-NEXT: psth r3, GlobSt3@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: Atomic_LdSt_i16: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: addis r3, r2, GlobLd3@toc@ha +; CHECK-P10-BE-NEXT: addis r4, r2, GlobSt3@toc@ha +; CHECK-P10-BE-NEXT: lhz r3, GlobLd3@toc@l(r3) +; CHECK-P10-BE-NEXT: sth r3, GlobSt3@toc@l(r4) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-LABEL: Atomic_LdSt_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, GlobLd3@toc@ha +; CHECK-NEXT: addis r4, r2, GlobSt3@toc@ha +; CHECK-NEXT: lhz r3, GlobLd3@toc@l(r3) +; CHECK-NEXT: sth r3, GlobSt3@toc@l(r4) +; CHECK-NEXT: blr +entry: + %0 = load atomic i16, i16* getelementptr inbounds ([20 x i16], [20 x i16]* @GlobLd3, i64 0, i64 0) monotonic, align 2 + store atomic i16 %0, i16* getelementptr inbounds ([20 x i16], [20 x i16]* @GlobSt3, i64 0, i64 0) monotonic, align 2 + ret void +} + +; Function Attrs: nofree norecurse nounwind uwtable willreturn +define dso_local void @Atomic_LdSt_i32() { +; CHECK-P10-LE-LABEL: Atomic_LdSt_i32: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: plwz r3, GlobLd5@PCREL(0), 1 +; CHECK-P10-LE-NEXT: pstw r3, GlobSt5@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: Atomic_LdSt_i32: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: addis r3, r2, GlobLd5@toc@ha +; CHECK-P10-BE-NEXT: addis r4, r2, GlobSt5@toc@ha +; CHECK-P10-BE-NEXT: lwz r3, GlobLd5@toc@l(r3) +; CHECK-P10-BE-NEXT: stw r3, GlobSt5@toc@l(r4) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-LABEL: Atomic_LdSt_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, GlobLd5@toc@ha +; CHECK-NEXT: addis r4, r2, GlobSt5@toc@ha +; CHECK-NEXT: lwz r3, GlobLd5@toc@l(r3) +; CHECK-NEXT: stw r3, GlobSt5@toc@l(r4) +; CHECK-NEXT: blr +entry: + %0 = load atomic i32, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @GlobLd5, i64 0, i64 0) monotonic, align 4 + store atomic i32 %0, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @GlobSt5, i64 0, i64 0) monotonic, align 4 + ret void +} + +; Function Attrs: nofree norecurse nounwind uwtable willreturn +define dso_local void @Atomic_LdSt_i64() { +; CHECK-P10-LE-LABEL: Atomic_LdSt_i64: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: pld r3, GlobLd7@PCREL(0), 1 +; CHECK-P10-LE-NEXT: pstd r3, GlobSt7@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: Atomic_LdSt_i64: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: addis r3, r2, GlobLd7@toc@ha +; CHECK-P10-BE-NEXT: addis r4, r2, GlobSt7@toc@ha +; CHECK-P10-BE-NEXT: ld r3, GlobLd7@toc@l(r3) +; CHECK-P10-BE-NEXT: std r3, GlobSt7@toc@l(r4) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-P9-LABEL: Atomic_LdSt_i64: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: addis r3, r2, GlobLd7@toc@ha +; CHECK-P9-NEXT: addis r4, r2, GlobSt7@toc@ha +; CHECK-P9-NEXT: ld r3, GlobLd7@toc@l(r3) +; CHECK-P9-NEXT: std r3, GlobSt7@toc@l(r4) +; CHECK-P9-NEXT: blr +; +; CHECK-P8-LABEL: Atomic_LdSt_i64: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: addis r3, r2, GlobLd7@toc@ha +; CHECK-P8-NEXT: ld r3, GlobLd7@toc@l(r3) +; CHECK-P8-NEXT: addis r4, r2, GlobSt7@toc@ha +; CHECK-P8-NEXT: std r3, GlobSt7@toc@l(r4) +; CHECK-P8-NEXT: blr +entry: + %0 = load atomic i64, i64* getelementptr inbounds ([20 x i64], [20 x i64]* @GlobLd7, i64 0, i64 0) monotonic, align 8 + store atomic i64 %0, i64* getelementptr inbounds ([20 x i64], [20 x i64]* @GlobSt7, i64 0, i64 0) monotonic, align 8 + ret void +} + +; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly +define dso_local void @store_double_f64_to_uint(double %str) local_unnamed_addr #0 { +; CHECK-P10-LE-LABEL: store_double_f64_to_uint: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: xscvdpuxds v2, f1 +; CHECK-P10-LE-NEXT: pstxsd v2, GlobSt10@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: store_double_f64_to_uint: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xscvdpuxds v2, f1 +; CHECK-P10-BE-NEXT: addis r3, r2, GlobSt10@toc@ha +; CHECK-P10-BE-NEXT: addi r3, r3, GlobSt10@toc@l +; CHECK-P10-BE-NEXT: stxsd v2, 0(r3) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-P9-LABEL: store_double_f64_to_uint: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: xscvdpuxds v2, f1 +; CHECK-P9-NEXT: addis r3, r2, GlobSt10@toc@ha +; CHECK-P9-NEXT: addi r3, r3, GlobSt10@toc@l +; CHECK-P9-NEXT: stxsd v2, 0(r3) +; CHECK-P9-NEXT: blr +; +; CHECK-P8-LABEL: store_double_f64_to_uint: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: xscvdpuxds f0, f1 +; CHECK-P8-NEXT: addis r3, r2, GlobSt10@toc@ha +; CHECK-P8-NEXT: addi r3, r3, GlobSt10@toc@l +; CHECK-P8-NEXT: stxsdx f0, 0, r3 +; CHECK-P8-NEXT: blr +entry: + %conv = fptoui double %str to i64 + store i64 %conv, i64* bitcast ([20 x double]* @GlobSt10 to i64*), align 8 + ret void +} + +; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly +define dso_local void @store_double_f64_to_sint(double %str) local_unnamed_addr #0 { +; CHECK-P10-LE-LABEL: store_double_f64_to_sint: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: xscvdpsxds v2, f1 +; CHECK-P10-LE-NEXT: pstxsd v2, GlobSt10@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: store_double_f64_to_sint: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xscvdpsxds v2, f1 +; CHECK-P10-BE-NEXT: addis r3, r2, GlobSt10@toc@ha +; CHECK-P10-BE-NEXT: addi r3, r3, GlobSt10@toc@l +; CHECK-P10-BE-NEXT: stxsd v2, 0(r3) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-P9-LABEL: store_double_f64_to_sint: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: xscvdpsxds v2, f1 +; CHECK-P9-NEXT: addis r3, r2, GlobSt10@toc@ha +; CHECK-P9-NEXT: addi r3, r3, GlobSt10@toc@l +; CHECK-P9-NEXT: stxsd v2, 0(r3) +; CHECK-P9-NEXT: blr +; +; CHECK-P8-LABEL: store_double_f64_to_sint: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: xscvdpsxds f0, f1 +; CHECK-P8-NEXT: addis r3, r2, GlobSt10@toc@ha +; CHECK-P8-NEXT: addi r3, r3, GlobSt10@toc@l +; CHECK-P8-NEXT: stxsdx f0, 0, r3 +; CHECK-P8-NEXT: blr +entry: + %conv = fptosi double %str to i64 + store i64 %conv, i64* bitcast ([20 x double]* @GlobSt10 to i64*), align 8 + ret void +} + +; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly +define dso_local void @store_f128_to_uint(fp128 %str) local_unnamed_addr #0 { +; CHECK-P10-LE-LABEL: store_f128_to_uint: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: xscvqpudz v2, v2 +; CHECK-P10-LE-NEXT: pstxsd v2, GlobF128@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: store_f128_to_uint: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xscvqpudz v2, v2 +; CHECK-P10-BE-NEXT: addis r3, r2, GlobF128@toc@ha +; CHECK-P10-BE-NEXT: addi r3, r3, GlobF128@toc@l +; CHECK-P10-BE-NEXT: stxsd v2, 0(r3) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-P9-LABEL: store_f128_to_uint: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: xscvqpudz v2, v2 +; CHECK-P9-NEXT: addis r3, r2, GlobF128@toc@ha +; CHECK-P9-NEXT: addi r3, r3, GlobF128@toc@l +; CHECK-P9-NEXT: stxsd v2, 0(r3) +; CHECK-P9-NEXT: blr +; +; CHECK-P8-LE-LABEL: store_f128_to_uint: +; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: mflr r0 +; CHECK-P8-LE-NEXT: std r0, 16(r1) +; CHECK-P8-LE-NEXT: stdu r1, -32(r1) +; CHECK-P8-LE-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-LE-NEXT: .cfi_offset lr, 16 +; CHECK-P8-LE-NEXT: bl __fixunskfdi +; CHECK-P8-LE-NEXT: nop +; CHECK-P8-LE-NEXT: addis r4, r2, GlobF128@toc@ha +; CHECK-P8-LE-NEXT: std r3, GlobF128@toc@l(r4) +; CHECK-P8-LE-NEXT: addi r1, r1, 32 +; CHECK-P8-LE-NEXT: ld r0, 16(r1) +; CHECK-P8-LE-NEXT: mtlr r0 +; CHECK-P8-LE-NEXT: blr +; +; CHECK-P8-BE-LABEL: store_f128_to_uint: +; CHECK-P8-BE: # %bb.0: # %entry +; CHECK-P8-BE-NEXT: mflr r0 +; CHECK-P8-BE-NEXT: std r0, 16(r1) +; CHECK-P8-BE-NEXT: stdu r1, -112(r1) +; CHECK-P8-BE-NEXT: .cfi_def_cfa_offset 112 +; CHECK-P8-BE-NEXT: .cfi_offset lr, 16 +; CHECK-P8-BE-NEXT: bl __fixunskfdi +; CHECK-P8-BE-NEXT: nop +; CHECK-P8-BE-NEXT: addis r4, r2, GlobF128@toc@ha +; CHECK-P8-BE-NEXT: std r3, GlobF128@toc@l(r4) +; CHECK-P8-BE-NEXT: addi r1, r1, 112 +; CHECK-P8-BE-NEXT: ld r0, 16(r1) +; CHECK-P8-BE-NEXT: mtlr r0 +; CHECK-P8-BE-NEXT: blr +entry: + %conv = fptoui fp128 %str to i64 + store i64 %conv, i64* bitcast ([20 x fp128]* @GlobF128 to i64*), align 16 + ret void +} + +; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly +define dso_local void @store_f128_to_sint(fp128 %str) local_unnamed_addr #0 { +; CHECK-P10-LE-LABEL: store_f128_to_sint: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: xscvqpsdz v2, v2 +; CHECK-P10-LE-NEXT: pstxsd v2, GlobF128@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: store_f128_to_sint: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xscvqpsdz v2, v2 +; CHECK-P10-BE-NEXT: addis r3, r2, GlobF128@toc@ha +; CHECK-P10-BE-NEXT: addi r3, r3, GlobF128@toc@l +; CHECK-P10-BE-NEXT: stxsd v2, 0(r3) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-P9-LABEL: store_f128_to_sint: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: xscvqpsdz v2, v2 +; CHECK-P9-NEXT: addis r3, r2, GlobF128@toc@ha +; CHECK-P9-NEXT: addi r3, r3, GlobF128@toc@l +; CHECK-P9-NEXT: stxsd v2, 0(r3) +; CHECK-P9-NEXT: blr +; +; CHECK-P8-LE-LABEL: store_f128_to_sint: +; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: mflr r0 +; CHECK-P8-LE-NEXT: std r0, 16(r1) +; CHECK-P8-LE-NEXT: stdu r1, -32(r1) +; CHECK-P8-LE-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-LE-NEXT: .cfi_offset lr, 16 +; CHECK-P8-LE-NEXT: bl __fixkfdi +; CHECK-P8-LE-NEXT: nop +; CHECK-P8-LE-NEXT: addis r4, r2, GlobF128@toc@ha +; CHECK-P8-LE-NEXT: std r3, GlobF128@toc@l(r4) +; CHECK-P8-LE-NEXT: addi r1, r1, 32 +; CHECK-P8-LE-NEXT: ld r0, 16(r1) +; CHECK-P8-LE-NEXT: mtlr r0 +; CHECK-P8-LE-NEXT: blr +; +; CHECK-P8-BE-LABEL: store_f128_to_sint: +; CHECK-P8-BE: # %bb.0: # %entry +; CHECK-P8-BE-NEXT: mflr r0 +; CHECK-P8-BE-NEXT: std r0, 16(r1) +; CHECK-P8-BE-NEXT: stdu r1, -112(r1) +; CHECK-P8-BE-NEXT: .cfi_def_cfa_offset 112 +; CHECK-P8-BE-NEXT: .cfi_offset lr, 16 +; CHECK-P8-BE-NEXT: bl __fixkfdi +; CHECK-P8-BE-NEXT: nop +; CHECK-P8-BE-NEXT: addis r4, r2, GlobF128@toc@ha +; CHECK-P8-BE-NEXT: std r3, GlobF128@toc@l(r4) +; CHECK-P8-BE-NEXT: addi r1, r1, 112 +; CHECK-P8-BE-NEXT: ld r0, 16(r1) +; CHECK-P8-BE-NEXT: mtlr r0 +; CHECK-P8-BE-NEXT: blr +entry: + %conv = fptosi fp128 %str to i64 + store i64 %conv, i64* bitcast ([20 x fp128]* @GlobF128 to i64*), align 16 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll --- a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll +++ b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ ; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10-BE ; Function Attrs: norecurse nounwind readonly define dso_local <2 x double> @test1(<2 x float>* nocapture readonly %Ptr) { @@ -75,3 +81,30 @@ %2 = fpext <2 x float> %sub to <2 x double> ret <2 x double> %2 } + +@G = dso_local local_unnamed_addr global <2 x float> , align 8 + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readonly uwtable willreturn +define dso_local <2 x double> @test5(<2 x double> %a) { +; CHECK-P10-LABEL: test5: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: plfd f0, G@PCREL(0), 1 +; CHECK-P10-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-P10-NEXT: xvcvspdp vs0, vs0 +; CHECK-P10-NEXT: xvadddp v2, vs0, v2 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: test5: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: addis r3, r2, G@toc@ha +; CHECK-P10-BE-NEXT: lfd f0, G@toc@l(r3) +; CHECK-P10-BE-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-P10-BE-NEXT: xvcvspdp vs0, vs0 +; CHECK-P10-BE-NEXT: xvadddp v2, vs0, v2 +; CHECK-P10-BE-NEXT: blr +entry: + %0 = load <2 x float>, <2 x float>* @G, align 8 + %1 = fpext <2 x float> %0 to <2 x double> + %add = fadd <2 x double> %1, %a + ret <2 x double> %add +}