diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -253,6 +253,14 @@ None) == PPC::AM_DForm; } + /// SelectPCRelForm - Returns true if address N can be represented by + /// PC-Relative addressing mode. + bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp, + SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + None) == PPC::AM_PCRel; + } + /// SelectXForm - Returns true if address N can be represented by the /// addressing mode of XForm instructions (an indexed [r+r] operation). bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -713,6 +713,7 @@ AM_DSForm, AM_DQForm, AM_XForm, + AM_PCRel }; } // end namespace PPC diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1509,6 +1509,7 @@ PPC::MOF_NotAddNorCst | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10, PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10, }; + AddrModesMap[PPC::AM_PCRel] = {PPC::MOF_PCRel | PPC::MOF_SubtargetP10}; } /// getMaxByValAlign - Helper for getByValTypeAlignment to determine @@ -16882,6 +16883,9 @@ for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm)) if ((Flags & FlagSet) == FlagSet) return PPC::AM_DQForm; + for (auto FlagSet : AddrModesMap.at(PPC::AM_PCRel)) + if ((Flags & FlagSet) == FlagSet) + return PPC::AM_PCRel; // If no other forms are selected, return an X-Form as it is the most // general addressing mode. return PPC::AM_XForm; @@ -16989,6 +16993,19 @@ if (Subtarget.hasSPE()) FlagSet |= PPC::MOF_SubtargetSPE; + // Adding Power10 specific flags. + if (FlagSet & PPC::MOF_SubtargetP10) { + // Check if we have a PCRel node. If so, add the PCRel flag, return early. + if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR || + isValidPCRelNode(N) || + isValidPCRelNode(N) || + isValidPCRelNode(N) || + isValidPCRelNode(N)) { + FlagSet |= PPC::MOF_PCRel; + return FlagSet; + } + } + // Mark this as something we don't want to handle here if it is atomic // or pre-increment instruction. if (const LSBaseSDNode *LSB = dyn_cast(Parent)) @@ -17184,6 +17201,10 @@ Base = N; break; } + case PPC::AM_PCRel: { + Disp = N; + break; + } case PPC::AM_None: break; default: { // By default, X-Form is always available to be selected. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1149,6 +1149,7 @@ def DQForm : ComplexPattern; def XForm : ComplexPattern; def ForceXForm : ComplexPattern; +def PCRelForm : ComplexPattern; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1679,178 +1679,168 @@ } } -// TODO: We have an added complexity of 500 here. This is only a temporary -// solution to have tablegen consider these patterns first. The way we do -// addressing for PowerPC is complex depending on available D form, X form, or -// aligned D form loads/stores like DS and DQ forms. The prefixed -// instructions in this file also add additional PC Relative loads/stores -// and D form loads/stores with 34 bit immediates. It is very difficult to force -// instruction selection to consistently pick these first without the current -// added complexity. Once pc-relative implementation is complete, a set of -// follow-up patches will address this refactoring and the AddedComplexity will -// be removed. -let Predicates = [PCRelativeMemops], AddedComplexity = 500 in { +let Predicates = [PCRelativeMemops] in { // Load i32 - def : Pat<(i32 (zextloadi1 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i32 (zextloadi1 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZpc $ga, 0)>; - def : Pat<(i32 (extloadi1 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i32 (extloadi1 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZpc $ga, 0)>; - def : Pat<(i32 (zextloadi8 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i32 (zextloadi8 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZpc $ga, 0)>; - def : Pat<(i32 (extloadi8 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i32 (extloadi8 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZpc $ga, 0)>; - def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), (PLHApc $ga, 0)>; - def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), (PLHZpc $ga, 0)>; - def : Pat<(i32 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i32 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))), (PLHZpc $ga, 0)>; - def : Pat<(i32 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLWZpc $ga, 0)>; + def : Pat<(i32 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLWZpc $ga, 0)>; // Store i32 - def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTBpc $RS, $ga, 0)>; - def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTHpc $RS, $ga, 0)>; - def : Pat<(store i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTWpc $RS, $ga, 0)>; // Load i64 - def : Pat<(i64 (zextloadi1 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (zextloadi1 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZ8pc $ga, 0)>; - def : Pat<(i64 (extloadi1 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (extloadi1 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZ8pc $ga, 0)>; - def : Pat<(i64 (zextloadi8 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (zextloadi8 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZ8pc $ga, 0)>; - def : Pat<(i64 (extloadi8 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (extloadi8 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZ8pc $ga, 0)>; - def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), (PLHA8pc $ga, 0)>; - def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))), (PLHZ8pc $ga, 0)>; - def : Pat<(i64 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))), (PLHZ8pc $ga, 0)>; - def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr PCRelForm:$ga))), (PLWZ8pc $ga, 0)>; - def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr PCRelForm:$ga))), (PLWA8pc $ga, 0)>; - def : Pat<(i64 (extloadi32 (PPCmatpcreladdr pcreladdr:$ga))), + def : Pat<(i64 (extloadi32 (PPCmatpcreladdr PCRelForm:$ga))), (PLWZ8pc $ga, 0)>; - def : Pat<(i64 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLDpc $ga, 0)>; + def : Pat<(i64 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLDpc $ga, 0)>; // Store i64 - def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTB8pc $RS, $ga, 0)>; - def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTH8pc $RS, $ga, 0)>; - def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTW8pc $RS, $ga, 0)>; - def : Pat<(store i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTDpc $RS, $ga, 0)>; // Load f32 - def : Pat<(f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFSpc $addr, 0)>; + def : Pat<(f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFSpc $addr, 0)>; // Store f32 - def : Pat<(store f32:$FRS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store f32:$FRS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTFSpc $FRS, $ga, 0)>; // Load f64 - def : Pat<(f64 (extloadf32 (PPCmatpcreladdr pcreladdr:$addr))), + def : Pat<(f64 (extloadf32 (PPCmatpcreladdr PCRelForm:$addr))), (COPY_TO_REGCLASS (PLFSpc $addr, 0), VSFRC)>; - def : Pat<(f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFDpc $addr, 0)>; + def : Pat<(f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFDpc $addr, 0)>; // Store f64 - def : Pat<(store f64:$FRS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store f64:$FRS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTFDpc $FRS, $ga, 0)>; // Load f128 - def : Pat<(f128 (load (PPCmatpcreladdr pcreladdr:$addr))), + def : Pat<(f128 (load (PPCmatpcreladdr PCRelForm:$addr))), (COPY_TO_REGCLASS (PLXVpc $addr, 0), VRRC)>; // Store f128 - def : Pat<(store f128:$XS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store f128:$XS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTXVpc (COPY_TO_REGCLASS $XS, VSRC), $ga, 0)>; // Load v4i32 - def : Pat<(v4i32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>; + def : Pat<(v4i32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; // Store v4i32 - def : Pat<(store v4i32:$XS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store v4i32:$XS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTXVpc $XS, $ga, 0)>; // Load v2i64 - def : Pat<(v2i64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>; + def : Pat<(v2i64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; // Store v2i64 - def : Pat<(store v2i64:$XS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store v2i64:$XS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTXVpc $XS, $ga, 0)>; // Load v4f32 - def : Pat<(v4f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>; + def : Pat<(v4f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; // Store v4f32 - def : Pat<(store v4f32:$XS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store v4f32:$XS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTXVpc $XS, $ga, 0)>; // Load v2f64 - def : Pat<(v2f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>; + def : Pat<(v2f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>; // Store v2f64 - def : Pat<(store v2f64:$XS, (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(store v2f64:$XS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTXVpc $XS, $ga, 0)>; // Atomic Load - def : Pat<(atomic_load_8 (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(atomic_load_8 (PPCmatpcreladdr PCRelForm:$ga)), (PLBZpc $ga, 0)>; - def : Pat<(atomic_load_16 (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(atomic_load_16 (PPCmatpcreladdr PCRelForm:$ga)), (PLHZpc $ga, 0)>; - def : Pat<(atomic_load_32 (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(atomic_load_32 (PPCmatpcreladdr PCRelForm:$ga)), (PLWZpc $ga, 0)>; - def : Pat<(atomic_load_64 (PPCmatpcreladdr pcreladdr:$ga)), + def : Pat<(atomic_load_64 (PPCmatpcreladdr PCRelForm:$ga)), (PLDpc $ga, 0)>; // Atomic Store - def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS), + def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS), (PSTBpc $RS, $ga, 0)>; - def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS), + def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS), (PSTHpc $RS, $ga, 0)>; - def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS), + def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS), (PSTWpc $RS, $ga, 0)>; - def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS), + def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), (PSTB8pc $RS, $ga, 0)>; - def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS), + def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), (PSTH8pc $RS, $ga, 0)>; - def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS), + def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), (PSTW8pc $RS, $ga, 0)>; - def : Pat<(atomic_store_64 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS), + def : Pat<(atomic_store_64 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS), (PSTDpc $RS, $ga, 0)>; // Special Cases For PPCstore_scal_int_from_vsr def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), - (PPCmatpcreladdr pcreladdr:$dst), 8), + (PPCmatpcreladdr PCRelForm:$dst), 8), (PSTXSDpc (XSCVDPSXDS f64:$src), $dst, 0)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), - (PPCmatpcreladdr pcreladdr:$dst), 8), + (PPCmatpcreladdr PCRelForm:$dst), 8), (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), $dst, 0)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), - (PPCmatpcreladdr pcreladdr:$dst), 8), + (PPCmatpcreladdr PCRelForm:$dst), 8), (PSTXSDpc (XSCVDPUXDS f64:$src), $dst, 0)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), - (PPCmatpcreladdr pcreladdr:$dst), 8), + (PPCmatpcreladdr PCRelForm:$dst), 8), (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), $dst, 0)>; // If the PPCmatpcreladdr node is not caught by any other pattern it should be // caught here and turned into a paddi instruction to materialize the address. - def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>; + def : Pat<(PPCmatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>; // PPCtlsdynamatpcreladdr node is used for TLS dynamic models to materialize // tls global address with paddi instruction. - def : Pat<(PPCtlsdynamatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>; + def : Pat<(PPCtlsdynamatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>; // PPCtlslocalexecmataddr node is used for TLS local exec models to // materialize tls global address with paddi instruction. def : Pat<(PPCaddTls i64:$in, (PPCtlslocalexecmataddr tglobaltlsaddr:$addr)), diff --git a/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll b/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll --- a/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll @@ -42,6 +42,7 @@ @GlobSt11 = dso_local local_unnamed_addr global [20 x <16 x i8>] zeroinitializer, align 16 @GlobLd12 = dso_local local_unnamed_addr global [20 x <16 x i8>] zeroinitializer, align 16 @GlobSt12 = dso_local local_unnamed_addr global [20 x <16 x i8>] zeroinitializer, align 16 +@GlobF128 = dso_local local_unnamed_addr global [20 x fp128] zeroinitializer, align 16 ; Function Attrs: nofree norecurse nounwind uwtable willreturn define dso_local void @testGlob1PtrPlus0() { @@ -2270,3 +2271,323 @@ store <16 x i8> %0, <16 x i8>* %arrayidx1, align 16 ret void } + +; Function Attrs: nofree norecurse nounwind uwtable willreturn +define dso_local void @Atomic_LdSt_i8() { +; CHECK-P10-LE-LABEL: Atomic_LdSt_i8: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: plbz r3, GlobLd1@PCREL(0), 1 +; CHECK-P10-LE-NEXT: pstb r3, GlobSt1@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: Atomic_LdSt_i8: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: addis r3, r2, GlobLd1@toc@ha +; CHECK-P10-BE-NEXT: addis r4, r2, GlobSt1@toc@ha +; CHECK-P10-BE-NEXT: lbz r3, GlobLd1@toc@l(r3) +; CHECK-P10-BE-NEXT: stb r3, GlobSt1@toc@l(r4) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-LABEL: Atomic_LdSt_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, GlobLd1@toc@ha +; CHECK-NEXT: addis r4, r2, GlobSt1@toc@ha +; CHECK-NEXT: lbz r3, GlobLd1@toc@l(r3) +; CHECK-NEXT: stb r3, GlobSt1@toc@l(r4) +; CHECK-NEXT: blr +entry: + %0 = load atomic i8, i8* getelementptr inbounds ([20 x i8], [20 x i8]* @GlobLd1, i64 0, i64 0) monotonic, align 1 + store atomic i8 %0, i8* getelementptr inbounds ([20 x i8], [20 x i8]* @GlobSt1, i64 0, i64 0) monotonic, align 1 + ret void +} + +; Function Attrs: nofree norecurse nounwind uwtable willreturn +define dso_local void @Atomic_LdSt_i16() { +; CHECK-P10-LE-LABEL: Atomic_LdSt_i16: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: plhz r3, GlobLd3@PCREL(0), 1 +; CHECK-P10-LE-NEXT: psth r3, GlobSt3@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: Atomic_LdSt_i16: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: addis r3, r2, GlobLd3@toc@ha +; CHECK-P10-BE-NEXT: addis r4, r2, GlobSt3@toc@ha +; CHECK-P10-BE-NEXT: lhz r3, GlobLd3@toc@l(r3) +; CHECK-P10-BE-NEXT: sth r3, GlobSt3@toc@l(r4) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-LABEL: Atomic_LdSt_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, GlobLd3@toc@ha +; CHECK-NEXT: addis r4, r2, GlobSt3@toc@ha +; CHECK-NEXT: lhz r3, GlobLd3@toc@l(r3) +; CHECK-NEXT: sth r3, GlobSt3@toc@l(r4) +; CHECK-NEXT: blr +entry: + %0 = load atomic i16, i16* getelementptr inbounds ([20 x i16], [20 x i16]* @GlobLd3, i64 0, i64 0) monotonic, align 2 + store atomic i16 %0, i16* getelementptr inbounds ([20 x i16], [20 x i16]* @GlobSt3, i64 0, i64 0) monotonic, align 2 + ret void +} + +; Function Attrs: nofree norecurse nounwind uwtable willreturn +define dso_local void @Atomic_LdSt_i32() { +; CHECK-P10-LE-LABEL: Atomic_LdSt_i32: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: plwz r3, GlobLd5@PCREL(0), 1 +; CHECK-P10-LE-NEXT: pstw r3, GlobSt5@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: Atomic_LdSt_i32: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: addis r3, r2, GlobLd5@toc@ha +; CHECK-P10-BE-NEXT: addis r4, r2, GlobSt5@toc@ha +; CHECK-P10-BE-NEXT: lwz r3, GlobLd5@toc@l(r3) +; CHECK-P10-BE-NEXT: stw r3, GlobSt5@toc@l(r4) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-LABEL: Atomic_LdSt_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, GlobLd5@toc@ha +; CHECK-NEXT: addis r4, r2, GlobSt5@toc@ha +; CHECK-NEXT: lwz r3, GlobLd5@toc@l(r3) +; CHECK-NEXT: stw r3, GlobSt5@toc@l(r4) +; CHECK-NEXT: blr +entry: + %0 = load atomic i32, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @GlobLd5, i64 0, i64 0) monotonic, align 4 + store atomic i32 %0, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @GlobSt5, i64 0, i64 0) monotonic, align 4 + ret void +} + +; Function Attrs: nofree norecurse nounwind uwtable willreturn +define dso_local void @Atomic_LdSt_i64() { +; CHECK-P10-LE-LABEL: Atomic_LdSt_i64: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: pld r3, GlobLd7@PCREL(0), 1 +; CHECK-P10-LE-NEXT: pstd r3, GlobSt7@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: Atomic_LdSt_i64: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: addis r3, r2, GlobLd7@toc@ha +; CHECK-P10-BE-NEXT: addis r4, r2, GlobSt7@toc@ha +; CHECK-P10-BE-NEXT: ld r3, GlobLd7@toc@l(r3) +; CHECK-P10-BE-NEXT: std r3, GlobSt7@toc@l(r4) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-P9-LABEL: Atomic_LdSt_i64: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: addis r3, r2, GlobLd7@toc@ha +; CHECK-P9-NEXT: addis r4, r2, GlobSt7@toc@ha +; CHECK-P9-NEXT: ld r3, GlobLd7@toc@l(r3) +; CHECK-P9-NEXT: std r3, GlobSt7@toc@l(r4) +; CHECK-P9-NEXT: blr +; +; CHECK-P8-LABEL: Atomic_LdSt_i64: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: addis r3, r2, GlobLd7@toc@ha +; CHECK-P8-NEXT: ld r3, GlobLd7@toc@l(r3) +; CHECK-P8-NEXT: addis r4, r2, GlobSt7@toc@ha +; CHECK-P8-NEXT: std r3, GlobSt7@toc@l(r4) +; CHECK-P8-NEXT: blr +entry: + %0 = load atomic i64, i64* getelementptr inbounds ([20 x i64], [20 x i64]* @GlobLd7, i64 0, i64 0) monotonic, align 8 + store atomic i64 %0, i64* getelementptr inbounds ([20 x i64], [20 x i64]* @GlobSt7, i64 0, i64 0) monotonic, align 8 + ret void +} + +; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly +define dso_local void @store_double_f64_to_uint(double %str) local_unnamed_addr #0 { +; CHECK-P10-LE-LABEL: store_double_f64_to_uint: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: xscvdpuxds v2, f1 +; CHECK-P10-LE-NEXT: pstxsd v2, GlobSt10@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: store_double_f64_to_uint: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xscvdpuxds v2, f1 +; CHECK-P10-BE-NEXT: addis r3, r2, GlobSt10@toc@ha +; CHECK-P10-BE-NEXT: addi r3, r3, GlobSt10@toc@l +; CHECK-P10-BE-NEXT: stxsd v2, 0(r3) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-P9-LABEL: store_double_f64_to_uint: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: xscvdpuxds v2, f1 +; CHECK-P9-NEXT: addis r3, r2, GlobSt10@toc@ha +; CHECK-P9-NEXT: addi r3, r3, GlobSt10@toc@l +; CHECK-P9-NEXT: stxsd v2, 0(r3) +; CHECK-P9-NEXT: blr +; +; CHECK-P8-LABEL: store_double_f64_to_uint: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: xscvdpuxds f0, f1 +; CHECK-P8-NEXT: addis r3, r2, GlobSt10@toc@ha +; CHECK-P8-NEXT: addi r3, r3, GlobSt10@toc@l +; CHECK-P8-NEXT: stxsdx f0, 0, r3 +; CHECK-P8-NEXT: blr +entry: + %conv = fptoui double %str to i64 + store i64 %conv, i64* bitcast ([20 x double]* @GlobSt10 to i64*), align 8 + ret void +} + +; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly +define dso_local void @store_double_f64_to_sint(double %str) local_unnamed_addr #0 { +; CHECK-P10-LE-LABEL: store_double_f64_to_sint: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: xscvdpsxds v2, f1 +; CHECK-P10-LE-NEXT: pstxsd v2, GlobSt10@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: store_double_f64_to_sint: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xscvdpsxds v2, f1 +; CHECK-P10-BE-NEXT: addis r3, r2, GlobSt10@toc@ha +; CHECK-P10-BE-NEXT: addi r3, r3, GlobSt10@toc@l +; CHECK-P10-BE-NEXT: stxsd v2, 0(r3) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-P9-LABEL: store_double_f64_to_sint: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: xscvdpsxds v2, f1 +; CHECK-P9-NEXT: addis r3, r2, GlobSt10@toc@ha +; CHECK-P9-NEXT: addi r3, r3, GlobSt10@toc@l +; CHECK-P9-NEXT: stxsd v2, 0(r3) +; CHECK-P9-NEXT: blr +; +; CHECK-P8-LABEL: store_double_f64_to_sint: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: xscvdpsxds f0, f1 +; CHECK-P8-NEXT: addis r3, r2, GlobSt10@toc@ha +; CHECK-P8-NEXT: addi r3, r3, GlobSt10@toc@l +; CHECK-P8-NEXT: stxsdx f0, 0, r3 +; CHECK-P8-NEXT: blr +entry: + %conv = fptosi double %str to i64 + store i64 %conv, i64* bitcast ([20 x double]* @GlobSt10 to i64*), align 8 + ret void +} + +; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly +define dso_local void @store_f128_to_uint(fp128 %str) local_unnamed_addr #0 { +; CHECK-P10-LE-LABEL: store_f128_to_uint: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: xscvqpudz v2, v2 +; CHECK-P10-LE-NEXT: pstxsd v2, GlobF128@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: store_f128_to_uint: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xscvqpudz v2, v2 +; CHECK-P10-BE-NEXT: addis r3, r2, GlobF128@toc@ha +; CHECK-P10-BE-NEXT: addi r3, r3, GlobF128@toc@l +; CHECK-P10-BE-NEXT: stxsd v2, 0(r3) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-P9-LABEL: store_f128_to_uint: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: xscvqpudz v2, v2 +; CHECK-P9-NEXT: addis r3, r2, GlobF128@toc@ha +; CHECK-P9-NEXT: addi r3, r3, GlobF128@toc@l +; CHECK-P9-NEXT: stxsd v2, 0(r3) +; CHECK-P9-NEXT: blr +; +; CHECK-P8-LE-LABEL: store_f128_to_uint: +; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: mflr r0 +; CHECK-P8-LE-NEXT: std r0, 16(r1) +; CHECK-P8-LE-NEXT: stdu r1, -32(r1) +; CHECK-P8-LE-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-LE-NEXT: .cfi_offset lr, 16 +; CHECK-P8-LE-NEXT: bl __fixunskfdi +; CHECK-P8-LE-NEXT: nop +; CHECK-P8-LE-NEXT: addis r4, r2, GlobF128@toc@ha +; CHECK-P8-LE-NEXT: std r3, GlobF128@toc@l(r4) +; CHECK-P8-LE-NEXT: addi r1, r1, 32 +; CHECK-P8-LE-NEXT: ld r0, 16(r1) +; CHECK-P8-LE-NEXT: mtlr r0 +; CHECK-P8-LE-NEXT: blr +; +; CHECK-P8-BE-LABEL: store_f128_to_uint: +; CHECK-P8-BE: # %bb.0: # %entry +; CHECK-P8-BE-NEXT: mflr r0 +; CHECK-P8-BE-NEXT: std r0, 16(r1) +; CHECK-P8-BE-NEXT: stdu r1, -112(r1) +; CHECK-P8-BE-NEXT: .cfi_def_cfa_offset 112 +; CHECK-P8-BE-NEXT: .cfi_offset lr, 16 +; CHECK-P8-BE-NEXT: bl __fixunskfdi +; CHECK-P8-BE-NEXT: nop +; CHECK-P8-BE-NEXT: addis r4, r2, GlobF128@toc@ha +; CHECK-P8-BE-NEXT: std r3, GlobF128@toc@l(r4) +; CHECK-P8-BE-NEXT: addi r1, r1, 112 +; CHECK-P8-BE-NEXT: ld r0, 16(r1) +; CHECK-P8-BE-NEXT: mtlr r0 +; CHECK-P8-BE-NEXT: blr +entry: + %conv = fptoui fp128 %str to i64 + store i64 %conv, i64* bitcast ([20 x fp128]* @GlobF128 to i64*), align 16 + ret void +} + +; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly +define dso_local void @store_f128_to_sint(fp128 %str) local_unnamed_addr #0 { +; CHECK-P10-LE-LABEL: store_f128_to_sint: +; CHECK-P10-LE: # %bb.0: # %entry +; CHECK-P10-LE-NEXT: xscvqpsdz v2, v2 +; CHECK-P10-LE-NEXT: pstxsd v2, GlobF128@PCREL(0), 1 +; CHECK-P10-LE-NEXT: blr +; +; CHECK-P10-BE-LABEL: store_f128_to_sint: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xscvqpsdz v2, v2 +; CHECK-P10-BE-NEXT: addis r3, r2, GlobF128@toc@ha +; CHECK-P10-BE-NEXT: addi r3, r3, GlobF128@toc@l +; CHECK-P10-BE-NEXT: stxsd v2, 0(r3) +; CHECK-P10-BE-NEXT: blr +; +; CHECK-P9-LABEL: store_f128_to_sint: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: xscvqpsdz v2, v2 +; CHECK-P9-NEXT: addis r3, r2, GlobF128@toc@ha +; CHECK-P9-NEXT: addi r3, r3, GlobF128@toc@l +; CHECK-P9-NEXT: stxsd v2, 0(r3) +; CHECK-P9-NEXT: blr +; +; CHECK-P8-LE-LABEL: store_f128_to_sint: +; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: mflr r0 +; CHECK-P8-LE-NEXT: std r0, 16(r1) +; CHECK-P8-LE-NEXT: stdu r1, -32(r1) +; CHECK-P8-LE-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-LE-NEXT: .cfi_offset lr, 16 +; CHECK-P8-LE-NEXT: bl __fixkfdi +; CHECK-P8-LE-NEXT: nop +; CHECK-P8-LE-NEXT: addis r4, r2, GlobF128@toc@ha +; CHECK-P8-LE-NEXT: std r3, GlobF128@toc@l(r4) +; CHECK-P8-LE-NEXT: addi r1, r1, 32 +; CHECK-P8-LE-NEXT: ld r0, 16(r1) +; CHECK-P8-LE-NEXT: mtlr r0 +; CHECK-P8-LE-NEXT: blr +; +; CHECK-P8-BE-LABEL: store_f128_to_sint: +; CHECK-P8-BE: # %bb.0: # %entry +; CHECK-P8-BE-NEXT: mflr r0 +; CHECK-P8-BE-NEXT: std r0, 16(r1) +; CHECK-P8-BE-NEXT: stdu r1, -112(r1) +; CHECK-P8-BE-NEXT: .cfi_def_cfa_offset 112 +; CHECK-P8-BE-NEXT: .cfi_offset lr, 16 +; CHECK-P8-BE-NEXT: bl __fixkfdi +; CHECK-P8-BE-NEXT: nop +; CHECK-P8-BE-NEXT: addis r4, r2, GlobF128@toc@ha +; CHECK-P8-BE-NEXT: std r3, GlobF128@toc@l(r4) +; CHECK-P8-BE-NEXT: addi r1, r1, 112 +; CHECK-P8-BE-NEXT: ld r0, 16(r1) +; CHECK-P8-BE-NEXT: mtlr r0 +; CHECK-P8-BE-NEXT: blr +entry: + %conv = fptosi fp128 %str to i64 + store i64 %conv, i64* bitcast ([20 x fp128]* @GlobF128 to i64*), align 16 + ret void +}