diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -261,6 +261,13 @@ None) == PPC::AM_PCRel; } + /// SelectPDForm - Returns true if address N can be represented by Prefixed + /// DForm addressing mode (a base register, plus a signed 34-bit immediate. + bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + None) == PPC::AM_PrefixDForm; + } + /// SelectXForm - Returns true if address N can be represented by the /// addressing mode of XForm instructions (an indexed [r+r] operation). bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -703,6 +703,7 @@ AM_DForm, AM_DSForm, AM_DQForm, + AM_PrefixDForm, AM_XForm, AM_PCRel }; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1458,6 +1458,8 @@ PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector | PPC::MOF_SubtargetP9, PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10, }; + AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 | + PPC::MOF_SubtargetP10}; AddrModesMap[PPC::AM_PCRel] = {PPC::MOF_PCRel | PPC::MOF_SubtargetP10}; } @@ -16494,6 +16496,9 @@ for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm)) if ((Flags & FlagSet) == FlagSet) return PPC::AM_DQForm; + for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm)) + if ((Flags & FlagSet) == FlagSet) + return PPC::AM_PrefixDForm; for (auto FlagSet : AddrModesMap.at(PPC::AM_PCRel)) if ((Flags & FlagSet) == FlagSet) return PPC::AM_PCRel; @@ -16593,6 +16598,19 @@ FlagSet |= PPC::MOF_PCRel; return FlagSet; } + + // If the node is the paired load/store intrinsics, compute flags for + // address computation and return early. + unsigned ParentOp = Parent->getOpcode(); + if ((ParentOp == ISD::INTRINSIC_W_CHAIN) || + (ParentOp == ISD::INTRINSIC_VOID)) { + unsigned ID = cast(Parent->getOperand(1))->getZExtValue(); + SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp) ? + Parent->getOperand(2) : Parent->getOperand(3); + computeFlagsForAddressComputation(IntrinOp); + FlagSet |= PPC::MOF_Vector256; + return FlagSet; + } } // Mark this as something we don't want to handle here if it is atomic @@ -16791,6 +16809,26 @@ Base = N; break; } + case PPC::AM_PrefixDForm: { + assert((N.getValueType() == MVT::i64) && + "Prefixed Instructions only available on 64-bit targets!"); + int64_t Imm34 = 0; + unsigned Opcode = N.getOpcode(); + if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) && + (isIntS34Immediate(N.getOperand(1), Imm34))) { + // N is an Add/OR Node, and it's operand is a 34-bit signed immediate. + Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType()); + if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + else + Base = N.getOperand(0); + } else if (isIntS34Immediate(N, Imm34)) { + // The address is a 34-bit signed immediate. + Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType()); + Base = DAG.getRegister(PPC::ZERO8, N.getValueType()); + } + break; + } case PPC::AM_PCRel: { Disp = N; break; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1102,6 +1102,7 @@ def XForm : ComplexPattern; def ForceXForm : ComplexPattern; def PCRelForm : ComplexPattern; +def PDForm : ComplexPattern; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1659,18 +1659,18 @@ let Predicates = [PairedVectorMemops] in { // Intrinsics for Paired Vector Loads. - def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX16:$src)), (LXVP memrix16:$src)>; - def : Pat<(v256i1 (int_ppc_vsx_lxvp xaddrX16:$src)), (LXVPX xaddrX16:$src)>; + def : Pat<(v256i1 (int_ppc_vsx_lxvp DQForm:$src)), (LXVP memrix16:$src)>; + def : Pat<(v256i1 (int_ppc_vsx_lxvp XForm:$src)), (LXVPX XForm:$src)>; let Predicates = [PairedVectorMemops, PrefixInstrs] in { - def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX34:$src)), (PLXVP memri34:$src)>; + def : Pat<(v256i1 (int_ppc_vsx_lxvp PDForm:$src)), (PLXVP memri34:$src)>; } // Intrinsics for Paired Vector Stores. - def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX16:$dst), + def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, DQForm:$dst), (STXVP $XSp, memrix16:$dst)>; - def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, xaddrX16:$dst), - (STXVPX $XSp, xaddrX16:$dst)>; + def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, XForm:$dst), + (STXVPX $XSp, XForm:$dst)>; let Predicates = [PairedVectorMemops, PrefixInstrs] in { - def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX34:$dst), + def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, PDForm:$dst), (PSTXVP $XSp, memri34:$dst)>; } } @@ -2633,6 +2633,43 @@ // nand(A, nand(B, C)) def : xxevalPattern<(or (vnot_ppc v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), !sub(255, 14)>; + + // Anonymous patterns to select prefixed VSX loads and stores. + // Load / Store f128 + def : Pat<(f128 (load PDForm:$src)), + (COPY_TO_REGCLASS (PLXV memri34:$src), VRRC)>; + def : Pat<(store f128:$XS, PDForm:$dst), + (PSTXV (COPY_TO_REGCLASS $XS, VSRC), memri34:$dst)>; + + // Load / Store v4i32 + def : Pat<(v4i32 (load PDForm:$src)), (PLXV memri34:$src)>; + def : Pat<(store v4i32:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>; + + // Load / Store v2i64 + def : Pat<(v2i64 (load PDForm:$src)), (PLXV memri34:$src)>; + def : Pat<(store v2i64:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>; + + // Load / Store v4f32 + def : Pat<(v4f32 (load PDForm:$src)), (PLXV memri34:$src)>; + def : Pat<(store v4f32:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>; + + // Load / Store v2f64 + def : Pat<(v2f64 (load PDForm:$src)), (PLXV memri34:$src)>; + def : Pat<(store v2f64:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>; + + // Cases For PPCstore_scal_int_from_vsr + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), (PDForm:$dst), 8), + (PSTXSD (XSCVDPUXDS f64:$src), memri34:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), (PDForm:$dst), 8), + (PSTXSD (XSCVDPSXDS f64:$src), memri34:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), (PDForm:$dst), 8), + (PSTXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), memri34:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), (PDForm:$dst), 8), + (PSTXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), memri34:$dst)>; } let Predicates = [PrefixInstrs] in { @@ -2654,5 +2691,58 @@ (XXBLENDVW $A, $B, $C)>; def : Pat<(int_ppc_vsx_xxblendvd v2i64:$A, v2i64:$B, v2i64:$C), (XXBLENDVD $A, $B, $C)>; + + // Anonymous patterns to select prefixed loads and stores. + // Load i32 + def : Pat<(i32 (extloadi8 PDForm:$src)), (PLBZ memri34:$src)>; + def : Pat<(i32 (zextloadi8 PDForm:$src)), (PLBZ memri34:$src)>; + def : Pat<(i32 (extloadi16 PDForm:$src)), (PLHZ memri34:$src)>; + def : Pat<(i32 (zextloadi16 PDForm:$src)), (PLHZ memri34:$src)>; + def : Pat<(i32 (sextloadi16 PDForm:$src)), (PLHA memri34:$src)>; + def : Pat<(i32 (load PDForm:$src)), (PLWZ memri34:$src)>; + + // Store i32 + def : Pat<(truncstorei8 i32:$rS, PDForm:$dst), (PSTB gprc:$rS, memri34:$dst)>; + def : Pat<(truncstorei16 i32:$rS, PDForm:$dst), (PSTH gprc:$rS, memri34:$dst)>; + def : Pat<(store i32:$rS, PDForm:$dst), (PSTW gprc:$rS, memri34:$dst)>; + + // Load i64 + def : Pat<(i64 (extloadi8 PDForm:$src)), (PLBZ8 memri34:$src)>; + def : Pat<(i64 (zextloadi8 PDForm:$src)), (PLBZ8 memri34:$src)>; + def : Pat<(i64 (extloadi16 PDForm:$src)), (PLHZ8 memri34:$src)>; + def : Pat<(i64 (zextloadi16 PDForm:$src)), (PLHZ8 memri34:$src)>; + def : Pat<(i64 (sextloadi16 PDForm:$src)), (PLHA8 memri34:$src)>; + def : Pat<(i64 (extloadi32 PDForm:$src)), (PLWZ8 memri34:$src)>; + def : Pat<(i64 (zextloadi32 PDForm:$src)), (PLWZ8 memri34:$src)>; + def : Pat<(i64 (sextloadi32 PDForm:$src)), (PLWA8 memri34:$src)>; + def : Pat<(i64 (load PDForm:$src)), (PLD memri34:$src)>; + + // Store i64 + def : Pat<(truncstorei8 i64:$rS, PDForm:$dst), (PSTB8 g8rc:$rS, memri34:$dst)>; + def : Pat<(truncstorei16 i64:$rS, PDForm:$dst), (PSTH8 g8rc:$rS, memri34:$dst)>; + def : Pat<(truncstorei32 i64:$rS, PDForm:$dst), (PSTW8 g8rc:$rS, memri34:$dst)>; + def : Pat<(store i64:$rS, PDForm:$dst), (PSTD g8rc:$rS, memri34:$dst)>; + + // Load / Store f32 + def : Pat<(f32 (load PDForm:$src)), (PLFS memri34:$src)>; + def : Pat<(store f32:$FRS, PDForm:$dst), (PSTFS $FRS, memri34:$dst)>; + + // Load / Store f64 + def : Pat<(f64 (extloadf32 (PDForm:$src))), + (COPY_TO_REGCLASS (PLFS memri34:$src), VSFRC)>; + def : Pat<(f64 (load PDForm:$src)), (PLFD memri34:$src)>; + def : Pat<(store f64:$FRS, PDForm:$dst), (PSTFD $FRS, memri34:$dst)>; + + // Atomic Load + def : Pat<(atomic_load_8 PDForm:$src), (PLBZ memri34:$src)>; + def : Pat<(atomic_load_16 PDForm:$src), (PLHZ memri34:$src)>; + def : Pat<(atomic_load_32 PDForm:$src), (PLWZ memri34:$src)>; + def : Pat<(atomic_load_64 PDForm:$src), (PLD memri34:$src)>; + + // Atomic Store + def : Pat<(atomic_store_8 PDForm:$dst, i32:$RS), (PSTB $RS, memri34:$dst)>; + def : Pat<(atomic_store_16 PDForm:$dst, i32:$RS), (PSTH $RS, memri34:$dst)>; + def : Pat<(atomic_store_32 PDForm:$dst, i32:$RS), (PSTW $RS, memri34:$dst)>; + def : Pat<(atomic_store_64 PDForm:$dst, i64:$RS), (PSTD $RS, memri34:$dst)>; } diff --git a/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll b/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll @@ -313,38 +313,28 @@ } define void @test_ldst_7(<256 x i1>* %vpp, <256 x i1>* %vp2) { -; FIXME: A prefixed load (plxvp) is expected here as the offset in this -; test case is a constant that fits within 34-bits. ; CHECK-LABEL: test_ldst_7: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li r5, 0 -; CHECK-NEXT: ori r5, r5, 32799 -; CHECK-NEXT: lxvpx vsp0, r3, r5 -; CHECK-NEXT: stxvpx vsp0, r4, r5 +; CHECK-NEXT: plxvp vsp0, 32799(r3), 0 +; CHECK-NEXT: pstxvp vsp0, 32799(r4), 0 ; CHECK-NEXT: blr ; ; CHECK-NOMMA-LABEL: test_ldst_7: ; CHECK-NOMMA: # %bb.0: # %entry -; CHECK-NOMMA-NEXT: li r5, 0 -; CHECK-NOMMA-NEXT: ori r5, r5, 32799 -; CHECK-NOMMA-NEXT: lxvpx vsp0, r3, r5 -; CHECK-NOMMA-NEXT: stxvpx vsp0, r4, r5 +; CHECK-NOMMA-NEXT: plxvp vsp0, 32799(r3), 0 +; CHECK-NOMMA-NEXT: pstxvp vsp0, 32799(r4), 0 ; CHECK-NOMMA-NEXT: blr ; ; CHECK-BE-LABEL: test_ldst_7: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: li r5, 0 -; CHECK-BE-NEXT: ori r5, r5, 32799 -; CHECK-BE-NEXT: lxvpx vsp0, r3, r5 -; CHECK-BE-NEXT: stxvpx vsp0, r4, r5 +; CHECK-BE-NEXT: plxvp vsp0, 32799(r3), 0 +; CHECK-BE-NEXT: pstxvp vsp0, 32799(r4), 0 ; CHECK-BE-NEXT: blr ; ; CHECK-BE-NOMMA-LABEL: test_ldst_7: ; CHECK-BE-NOMMA: # %bb.0: # %entry -; CHECK-BE-NOMMA-NEXT: li r5, 0 -; CHECK-BE-NOMMA-NEXT: ori r5, r5, 32799 -; CHECK-BE-NOMMA-NEXT: lxvpx vsp0, r3, r5 -; CHECK-BE-NOMMA-NEXT: stxvpx vsp0, r4, r5 +; CHECK-BE-NOMMA-NEXT: plxvp vsp0, 32799(r3), 0 +; CHECK-BE-NOMMA-NEXT: pstxvp vsp0, 32799(r4), 0 ; CHECK-BE-NOMMA-NEXT: blr entry: %0 = bitcast <256 x i1>* %vpp to i8* diff --git a/llvm/test/CodeGen/PowerPC/prefixed-ld-st-atomics.ll b/llvm/test/CodeGen/PowerPC/prefixed-ld-st-atomics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/prefixed-ld-st-atomics.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O3 \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O3 \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s | FileCheck %s + +; Atomic Loads and Stores +define zeroext i8 @atomic_load_i8(i8* nocapture readonly %ptr) { +; CHECK-LABEL: atomic_load_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plbz r3, 4294967297(r3), 0 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = load atomic i8, i8* %add.ptr monotonic, align 1 + ret i8 %0 +} + +define signext i16 @atomic_load_i16(i8* nocapture readonly %ptr) { +; CHECK-LABEL: atomic_load_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plhz r3, 4294967297(r3), 0 +; CHECK-NEXT: extsh r3, r3 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i16* + %1 = load atomic i16, i16* %0 monotonic, align 2 + ret i16 %1 +} + +define signext i32 @atomic_load_i32(i8* nocapture readonly %ptr) { +; CHECK-LABEL: atomic_load_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plwz r3, 4294967297(r3), 0 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i32* + %1 = load atomic i32, i32* %0 monotonic, align 4 + ret i32 %1 +} + +define i64 @atomic_load_i64(i8* nocapture readonly %ptr) { +; CHECK-LABEL: atomic_load_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i64* + %1 = load atomic i64, i64* %0 monotonic, align 8 + ret i64 %1 +} + +define void @atomic_store_i8_i32(i8* nocapture %ptr, i8 zeroext %str) { +; CHECK-LABEL: atomic_store_i8_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstb r4, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + store atomic i8 %str, i8* %add.ptr monotonic, align 1 + ret void +} + +define void @atomic_store_i16_i32(i8* nocapture %ptr, i16 signext %str) { +; CHECK-LABEL: atomic_store_i16_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: psth r4, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i16* + store atomic i16 %str, i16* %0 monotonic, align 2 + ret void +} + +define void @atomic_store_i32_i32(i8* nocapture %ptr, i32 signext %str) { +; CHECK-LABEL: atomic_store_i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstw r4, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i32* + store atomic i32 %str, i32* %0 monotonic, align 4 + ret void +} + +define void @atomic_store_i64(i8* nocapture %ptr, i64 %str) { +; CHECK-LABEL: atomic_store_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstd r4, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i64* + store atomic i64 %str, i64* %0 monotonic, align 8 + ret void +} + diff --git a/llvm/test/CodeGen/PowerPC/prefixed-ld-st-constants.ll b/llvm/test/CodeGen/PowerPC/prefixed-ld-st-constants.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/prefixed-ld-st-constants.ll @@ -0,0 +1,490 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O3 \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O3 \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s | FileCheck %s + +; i32 tests: loading, storing a constant address +define void @extload_i8_i32_cst(i8* nocapture %St) { +; CHECK-LABEL: extload_i8_i32_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plbz r4, 4294967297(0), 0 +; CHECK-NEXT: pstb r4, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %0 = load i8, i8* inttoptr (i64 4294967297 to i8*), align 1 + %add.ptr = getelementptr inbounds i8, i8* %St, i64 4294967297 + store i8 %0, i8* %add.ptr, align 1 + ret void +} + +define void @store_i8_i32_cst(i8* nocapture readonly %Ld) { +; CHECK-LABEL: store_i8_i32_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plbz r3, 4294967297(r3), 0 +; CHECK-NEXT: pstb r3, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %Ld, i64 4294967297 + %0 = load i8, i8* %add.ptr, align 1 + store i8 %0, i8* inttoptr (i64 4294967297 to i8*), align 1 + ret void +} + +define void @zextload_i8_i32_cst(i32* nocapture %St) { +; CHECK-LABEL: zextload_i8_i32_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plbz r4, 4294967297(0), 0 +; CHECK-NEXT: pstw r4, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %0 = load i8, i8* inttoptr (i64 4294967297 to i8*), align 1 + %conv = zext i8 %0 to i32 + %1 = bitcast i32* %St to i8* + %add.ptr = getelementptr inbounds i8, i8* %1, i64 4294967297 + %2 = bitcast i8* %add.ptr to i32* + store i32 %conv, i32* %2, align 4 + ret void +} + +define void @extload_i16_i32_cst(i16* nocapture %St) { +; CHECK-LABEL: extload_i16_i32_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plhz r4, 4294967297(0), 0 +; CHECK-NEXT: psth r4, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %0 = load i16, i16* inttoptr (i64 4294967297 to i16*), align 2 + %1 = bitcast i16* %St to i8* + %add.ptr = getelementptr inbounds i8, i8* %1, i64 4294967297 + %2 = bitcast i8* %add.ptr to i16* + store i16 %0, i16* %2, align 2 + ret void +} + +define void @store_i16_i32_cst(i16* nocapture readonly %Ld) { +; CHECK-LABEL: store_i16_i32_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plhz r3, 4294967297(r3), 0 +; CHECK-NEXT: psth r3, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = bitcast i16* %Ld to i8* + %add.ptr = getelementptr inbounds i8, i8* %0, i64 4294967297 + %1 = bitcast i8* %add.ptr to i16* + %2 = load i16, i16* %1, align 2 + store i16 %2, i16* inttoptr (i64 4294967297 to i16*), align 2 + ret void +} + +define void @zextload_i16_i32_cst(i32* nocapture %St) { +; CHECK-LABEL: zextload_i16_i32_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plhz r4, 4294967297(0), 0 +; CHECK-NEXT: pstw r4, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %0 = load i16, i16* inttoptr (i64 4294967297 to i16*), align 2 + %conv = zext i16 %0 to i32 + %1 = bitcast i32* %St to i8* + %add.ptr = getelementptr inbounds i8, i8* %1, i64 4294967297 + %2 = bitcast i8* %add.ptr to i32* + store i32 %conv, i32* %2, align 4 + ret void +} + +define void @sextload_i16_i32_cst(i32* nocapture %St) { +; CHECK-LABEL: sextload_i16_i32_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plha r4, 4294967297(0), 0 +; CHECK-NEXT: pstw r4, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %0 = load i16, i16* inttoptr (i64 4294967297 to i16*), align 2 + %conv = sext i16 %0 to i32 + %1 = bitcast i32* %St to i8* + %add.ptr = getelementptr inbounds i8, i8* %1, i64 4294967297 + %2 = bitcast i8* %add.ptr to i32* + store i32 %conv, i32* %2, align 4 + ret void +} + +define void @load_i32_cst(i32* nocapture %St) { +; CHECK-LABEL: load_i32_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plwz r4, 4294967297(0), 0 +; CHECK-NEXT: pstw r4, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* inttoptr (i64 4294967297 to i32*), align 4 + %1 = bitcast i32* %St to i8* + %add.ptr = getelementptr inbounds i8, i8* %1, i64 4294967297 + %2 = bitcast i8* %add.ptr to i32* + store i32 %0, i32* %2, align 4 + ret void +} + +define void @store_i32_cst(i32* nocapture readonly %Ld) { +; CHECK-LABEL: store_i32_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plwz r3, 4294967297(r3), 0 +; CHECK-NEXT: pstw r3, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = bitcast i32* %Ld to i8* + %add.ptr = getelementptr inbounds i8, i8* %0, i64 4294967297 + %1 = bitcast i8* %add.ptr to i32* + %2 = load i32, i32* %1, align 4 + store i32 %2, i32* inttoptr (i64 4294967297 to i32*), align 4 + ret void +} + +; i64 tests: loading a constant address +define signext i8 @extload_i8_i64_cst() { +; CHECK-LABEL: extload_i8_i64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plbz r3, 4294967297(0), 0 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +entry: + %0 = load i8, i8* inttoptr (i64 4294967297 to i8*), align 1 + ret i8 %0 +} + +define zeroext i8 @zextload_i8_i64_cst() { +; CHECK-LABEL: zextload_i8_i64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plbz r3, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = load i8, i8* inttoptr (i64 4294967297 to i8*), align 1 + ret i8 %0 +} + +define i16 @extload_i16_i64_cst() { +; CHECK-LABEL: extload_i16_i64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plhz r3, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = load i16, i16* inttoptr (i64 4294967297 to i16*), align 2 + ret i16 %0 +} + +define zeroext i16 @zextload_i16_i64_cst() { +; CHECK-LABEL: zextload_i16_i64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plhz r3, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = load i16, i16* inttoptr (i64 4294967297 to i16*), align 2 + ret i16 %0 +} + +define signext i16 @sextload_i16_i64_cst() { +; CHECK-LABEL: sextload_i16_i64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plha r3, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = load i16, i16* inttoptr (i64 4294967297 to i16*), align 2 + ret i16 %0 +} + +define i32 @extload_i32_i64_cst() { +; CHECK-LABEL: extload_i32_i64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plwz r3, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* inttoptr (i64 4294967297 to i32*), align 4 + ret i32 %0 +} + +define zeroext i32 @zextload_i32_i64_cst() { +; CHECK-LABEL: zextload_i32_i64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plwz r3, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* inttoptr (i64 4294967297 to i32*), align 4 + ret i32 %0 +} + +define signext i32 @sextload_i32_i64_cst() { +; CHECK-LABEL: sextload_i32_i64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plwa r3, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* inttoptr (i64 4294967297 to i32*), align 4 + ret i32 %0 +} + +define i64 @load_i64_cst() { +; CHECK-LABEL: load_i64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = load i64, i64* inttoptr (i64 4294967297 to i64*), align 8 + ret i64 %0 +} + +define void @store_i8_i64_cst(i8 zeroext %str) { +; CHECK-LABEL: store_i8_i64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstb r3, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + store i8 %str, i8* inttoptr (i64 4294967297 to i8*), align 1 + ret void +} + +define void @store_i16_i64_cst(i16 zeroext %str) { +; CHECK-LABEL: store_i16_i64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: psth r3, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + store i16 %str, i16* inttoptr (i64 4294967297 to i16*), align 2 + ret void +} + +define void @store_i32_i64_cst(i32 zeroext %str) { +; CHECK-LABEL: store_i32_i64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstw r3, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + store i32 %str, i32* inttoptr (i64 4294967297 to i32*), align 4 + ret void +} + +define void @store_i64_cst(i64 %str) { +; CHECK-LABEL: store_i64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstd r3, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + store i64 %str, i64* inttoptr (i64 4294967297 to i64*), align 8 + ret void +} + +; float, double, f128: loading a constant address +define float @load_float_cst() { +; CHECK-LABEL: load_float_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plfs f1, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = load float, float* inttoptr (i64 4294967297 to float*), align 4 + ret float %0 +} + +define double @extload_float_cst() { +; CHECK-LABEL: extload_float_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lis r3, -32768 +; CHECK-NEXT: ori r3, r3, 0 +; CHECK-NEXT: rldicl r3, r3, 1, 31 +; CHECK-NEXT: plfs f1, r3(r3), 0 +; CHECK-NEXT: blr +entry: + %0 = load float, float* inttoptr (i64 4294967297 to float*), align 4 + %conv = fpext float %0 to double + ret double %conv +} + +define double @load_double_cst() { +; CHECK-LABEL: load_double_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plfd f1, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = load double, double* inttoptr (i64 4294967297 to double*), align 8 + ret double %0 +} + +define void @store_float_cst(float %str) { +; CHECK-LABEL: store_float_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstfs f1, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + store float %str, float* inttoptr (i64 4294967297 to float*), align 4 + ret void +} + +define void @store_double_cst(double %str) { +; CHECK-LABEL: store_double_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstfd f1, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + store double %str, double* inttoptr (i64 4294967297 to double*), align 8 + ret void +} + +define fp128 @load_f128_cst() { +; CHECK-LABEL: load_f128_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv v2, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* inttoptr (i64 4294967297 to fp128*), align 16 + ret fp128 %0 +} + +define void @store_f128_cst(fp128 %str) { +; CHECK-LABEL: store_f128_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstxv v2, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + store fp128 %str, fp128* inttoptr (i64 4294967297 to fp128*), align 16 + ret void +} + +define void @store_double_fp_to_uint_cst(double %str) { +; CHECK-LABEL: store_double_fp_to_uint_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxds v2, f1 +; CHECK-NEXT: lis r3, -32768 +; CHECK-NEXT: ori r3, r3, 0 +; CHECK-NEXT: rldicl r3, r3, 1, 31 +; CHECK-NEXT: pstxsd v2, r3(r3), 0 +; CHECK-NEXT: blr +entry: + %conv = fptoui double %str to i64 + store i64 %conv, i64* inttoptr (i64 4294967297 to i64*), align 8 + ret void +} + +define void @store_double_fp_to_int_cst(double %str) { +; CHECK-LABEL: store_double_fp_to_int_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxds v2, f1 +; CHECK-NEXT: lis r3, -32768 +; CHECK-NEXT: ori r3, r3, 0 +; CHECK-NEXT: rldicl r3, r3, 1, 31 +; CHECK-NEXT: pstxsd v2, r3(r3), 0 +; CHECK-NEXT: blr +entry: + %conv = fptosi double %str to i64 + store i64 %conv, i64* inttoptr (i64 4294967297 to i64*), align 8 + ret void +} + +define void @store_f128_fp_to_uint_cst(fp128 %str) { +; CHECK-LABEL: store_f128_fp_to_uint_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvqpudz v2, v2 +; CHECK-NEXT: lis r3, -32768 +; CHECK-NEXT: ori r3, r3, 0 +; CHECK-NEXT: rldicl r3, r3, 1, 31 +; CHECK-NEXT: pstxsd v2, r3(r3), 0 +; CHECK-NEXT: blr +entry: + %conv = fptoui fp128 %str to i64 + store i64 %conv, i64* inttoptr (i64 4294967297 to i64*), align 8 + ret void +} + +define void @store_f128_fp_to_int_cst(fp128 %str) { +; CHECK-LABEL: store_f128_fp_to_int_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvqpsdz v2, v2 +; CHECK-NEXT: lis r3, -32768 +; CHECK-NEXT: ori r3, r3, 0 +; CHECK-NEXT: rldicl r3, r3, 1, 31 +; CHECK-NEXT: pstxsd v2, r3(r3), 0 +; CHECK-NEXT: blr +entry: + %conv = fptosi fp128 %str to i64 + store i64 %conv, i64* inttoptr (i64 4294967297 to i64*), align 8 + ret void +} + +; v4i32, v2i64, v4f32, v2f64: loading a constant address +define <4 x i32> @load_v4i32_cst() { +; CHECK-LABEL: load_v4i32_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv v2, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = load <4 x i32>, <4 x i32>* inttoptr (i64 4294967297 to <4 x i32>*), align 16 + ret <4 x i32> %0 +} + +define void @store_v4i32_cst(<4 x i32> %str) { +; CHECK-LABEL: store_v4i32_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstxv v2, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + store <4 x i32> %str, <4 x i32>* inttoptr (i64 4294967297 to <4 x i32>*), align 16 + ret void +} + +define <2 x i64> @load_v2i64_cst() { +; CHECK-LABEL: load_v2i64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv v2, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = load <2 x i64>, <2 x i64>* inttoptr (i64 4294967297 to <2 x i64>*), align 16 + ret <2 x i64> %0 +} + +define void @store_v2i64_cst(<2 x i64> %str) { +; CHECK-LABEL: store_v2i64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstxv v2, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + store <2 x i64> %str, <2 x i64>* inttoptr (i64 4294967297 to <2 x i64>*), align 16 + ret void +} + +define <4 x float> @load_v4f32_cst() { +; CHECK-LABEL: load_v4f32_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv v2, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = load <4 x float>, <4 x float>* inttoptr (i64 4294967297 to <4 x float>*), align 16 + ret <4 x float> %0 +} + +define void @store_v4f32_cst(<4 x float> %str) { +; CHECK-LABEL: store_v4f32_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstxv v2, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + store <4 x float> %str, <4 x float>* inttoptr (i64 4294967297 to <4 x float>*), align 16 + ret void +} + +define <2 x double> @load_v2f64_cst() { +; CHECK-LABEL: load_v2f64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv v2, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + %0 = load <2 x double>, <2 x double>* inttoptr (i64 4294967297 to <2 x double>*), align 16 + ret <2 x double> %0 +} + +define void @store_v2f64_cst(<2 x double> %str) { +; CHECK-LABEL: store_v2f64_cst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstxv v2, 4294967297(0), 0 +; CHECK-NEXT: blr +entry: + store <2 x double> %str, <2 x double>* inttoptr (i64 4294967297 to <2 x double>*), align 16 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/prefixed-ld-st.ll b/llvm/test/CodeGen/PowerPC/prefixed-ld-st.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/prefixed-ld-st.ll @@ -0,0 +1,548 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O3 \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O3 \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s | FileCheck %s + +; i32 tests: loading, storing a non-constant address +define void @extload_store_i8_i32_add(i8* nocapture readonly %Ld, i8* nocapture %St) { +; CHECK-LABEL: extload_store_i8_i32_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plbz r3, 4294967297(r3), 0 +; CHECK-NEXT: pstb r3, 4294967297(r4), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %Ld, i64 4294967297 + %0 = load i8, i8* %add.ptr, align 1 + %add.ptr1 = getelementptr inbounds i8, i8* %St, i64 4294967297 + store i8 %0, i8* %add.ptr1, align 1 + ret void +} + +define void @zextload_store_i8_i32_add(i8* nocapture readonly %Ld, i32* nocapture %St) { +; CHECK-LABEL: zextload_store_i8_i32_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plbz r3, 4294967297(r3), 0 +; CHECK-NEXT: pstw r3, 4294967297(r4), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %Ld, i64 4294967297 + %0 = load i8, i8* %add.ptr, align 1 + %conv = zext i8 %0 to i32 + %1 = bitcast i32* %St to i8* + %add.ptr1 = getelementptr inbounds i8, i8* %1, i64 4294967297 + %2 = bitcast i8* %add.ptr1 to i32* + store i32 %conv, i32* %2, align 4 + ret void +} + +define void @extload_store_i16_i32_add(i16* nocapture readonly %Ld, i16* nocapture %St) { +; CHECK-LABEL: extload_store_i16_i32_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plhz r3, 4294967297(r3), 0 +; CHECK-NEXT: psth r3, 4294967297(r4), 0 +; CHECK-NEXT: blr +entry: + %0 = bitcast i16* %Ld to i8* + %add.ptr = getelementptr inbounds i8, i8* %0, i64 4294967297 + %1 = bitcast i8* %add.ptr to i16* + %2 = load i16, i16* %1, align 2 + %3 = bitcast i16* %St to i8* + %add.ptr1 = getelementptr inbounds i8, i8* %3, i64 4294967297 + %4 = bitcast i8* %add.ptr1 to i16* + store i16 %2, i16* %4, align 2 + ret void +} + +define void @zextload_store_i16_i32_add(i16* nocapture readonly %Ld, i32* nocapture %St) { +; CHECK-LABEL: zextload_store_i16_i32_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plhz r3, 4294967297(r3), 0 +; CHECK-NEXT: pstw r3, 4294967297(r4), 0 +; CHECK-NEXT: blr +entry: + %0 = bitcast i16* %Ld to i8* + %add.ptr = getelementptr inbounds i8, i8* %0, i64 4294967297 + %1 = bitcast i8* %add.ptr to i16* + %2 = load i16, i16* %1, align 2 + %conv = zext i16 %2 to i32 + %3 = bitcast i32* %St to i8* + %add.ptr1 = getelementptr inbounds i8, i8* %3, i64 4294967297 + %4 = bitcast i8* %add.ptr1 to i32* + store i32 %conv, i32* %4, align 4 + ret void +} + +define void @sextload_store_i16_i32_add(i16* nocapture readonly %Ld, i32* nocapture %St) { +; CHECK-LABEL: sextload_store_i16_i32_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plha r3, 4294967297(r3), 0 +; CHECK-NEXT: pstw r3, 4294967297(r4), 0 +; CHECK-NEXT: blr +entry: + %0 = bitcast i16* %Ld to i8* + %add.ptr = getelementptr inbounds i8, i8* %0, i64 4294967297 + %1 = bitcast i8* %add.ptr to i16* + %2 = load i16, i16* %1, align 2 + %conv = sext i16 %2 to i32 + %3 = bitcast i32* %St to i8* + %add.ptr1 = getelementptr inbounds i8, i8* %3, i64 4294967297 + %4 = bitcast i8* %add.ptr1 to i32* + store i32 %conv, i32* %4, align 4 + ret void +} + +define void @load_store_i32_add(i32* nocapture readonly %Ld, i32* nocapture %St) { +; CHECK-LABEL: load_store_i32_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plwz r3, 4294967297(r3), 0 +; CHECK-NEXT: pstw r3, 4294967297(r4), 0 +; CHECK-NEXT: blr +entry: + %0 = bitcast i32* %Ld to i8* + %add.ptr = getelementptr inbounds i8, i8* %0, i64 4294967297 + %1 = bitcast i8* %add.ptr to i32* + %2 = load i32, i32* %1, align 4 + %3 = bitcast i32* %St to i8* + %add.ptr1 = getelementptr inbounds i8, i8* %3, i64 4294967297 + %4 = bitcast i8* %add.ptr1 to i32* + store i32 %2, i32* %4, align 4 + ret void +} + +; i64 tests: loading, storing a non-constant address +define signext i8 @extload_i8_i64_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: extload_i8_i64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plbz r3, 4294967297(r3), 0 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = load i8, i8* %add.ptr, align 1 + ret i8 %0 +} + +define zeroext i8 @zextload_i8_i64_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: zextload_i8_i64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plbz r3, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = load i8, i8* %add.ptr, align 1 + ret i8 %0 +} + +define i16 @extload_i16_i64_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: extload_i16_i64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plhz r3, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i16* + %1 = load i16, i16* %0, align 2 + ret i16 %1 +} + +define zeroext i16 @zextload_i16_i64_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: zextload_i16_i64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plhz r3, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i16* + %1 = load i16, i16* %0, align 2 + ret i16 %1 +} + +define signext i16 @sextload_i16_i64_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: sextload_i16_i64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plha r3, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i16* + %1 = load i16, i16* %0, align 2 + ret i16 %1 +} + +define i32 @extload_i32_i64_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: extload_i32_i64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plwz r3, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +define zeroext i32 @zextload_i32_i64_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: zextload_i32_i64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plwz r3, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +define signext i32 @sextload_i32_i64_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: sextload_i32_i64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plwa r3, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +define i64 @load_i64_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: load_i64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i64* + %1 = load i64, i64* %0, align 8 + ret i64 %1 +} + +define void @store_i8_i64_add(i8* nocapture %ptr, i8 zeroext %str) { +; CHECK-LABEL: store_i8_i64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstb r4, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + store i8 %str, i8* %add.ptr, align 1 + ret void +} + +define void @store_i16_i64_add(i8* nocapture %ptr, i16 zeroext %str) { +; CHECK-LABEL: store_i16_i64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: psth r4, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i16* + store i16 %str, i16* %0, align 2 + ret void +} + +define void @store_i32_i64_add(i8* nocapture %ptr, i32 zeroext %str) { +; CHECK-LABEL: store_i32_i64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstw r4, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i32* + store i32 %str, i32* %0, align 4 + ret void +} + +define void @store_i64_add(i8* nocapture %ptr, i64 %str) { +; CHECK-LABEL: store_i64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstd r4, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i64* + store i64 %str, i64* %0, align 8 + ret void +} + +; float, double, f128: loading, storing a non-constant address +define float @load_float_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: load_float_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plfs f1, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to float* + %1 = load float, float* %0, align 4 + ret float %1 +} + +define double @extload_float_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: extload_float_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lis r4, -32768 +; CHECK-NEXT: ori r4, r4, 0 +; CHECK-NEXT: rldicl r4, r4, 1, 31 +; CHECK-NEXT: add r3, r3, r4 +; CHECK-NEXT: plfs f1, r3(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to float* + %1 = load float, float* %0, align 4 + %conv = fpext float %1 to double + ret double %conv +} + +define double @load_double_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: load_double_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plfd f1, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to double* + %1 = load double, double* %0, align 8 + ret double %1 +} + +define void @store_float_add(i8* nocapture %ptr, float %str) { +; CHECK-LABEL: store_float_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstfs f1, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to float* + store float %str, float* %0, align 4 + ret void +} + +define void @store_double_add(i8* nocapture %ptr, double %str) { +; CHECK-LABEL: store_double_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstfd f1, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to double* + store double %str, double* %0, align 8 + ret void +} + +define fp128 @load_f128_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: load_f128_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv v2, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to fp128* + %1 = load fp128, fp128* %0, align 16 + ret fp128 %1 +} + +define void @store_f128_add(i8* nocapture %ptr, fp128 %str) { +; CHECK-LABEL: store_f128_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstxv v2, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to fp128* + store fp128 %str, fp128* %0, align 16 + ret void +} + +define void @store_double_fp_to_uint_add(i8* nocapture %ptr, double %str) { +; CHECK-LABEL: store_double_fp_to_uint_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lis r4, -32768 +; CHECK-NEXT: xscvdpuxds v2, f1 +; CHECK-NEXT: ori r4, r4, 0 +; CHECK-NEXT: rldicl r4, r4, 1, 31 +; CHECK-NEXT: add r3, r3, r4 +; CHECK-NEXT: pstxsd v2, r3(r3), 0 +; CHECK-NEXT: blr +entry: + %conv = fptoui double %str to i64 + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i64* + store i64 %conv, i64* %0, align 8 + ret void +} + +define void @store_double_fp_to_int_add(i8* nocapture %ptr, double %str) { +; CHECK-LABEL: store_double_fp_to_int_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lis r4, -32768 +; CHECK-NEXT: xscvdpsxds v2, f1 +; CHECK-NEXT: ori r4, r4, 0 +; CHECK-NEXT: rldicl r4, r4, 1, 31 +; CHECK-NEXT: add r3, r3, r4 +; CHECK-NEXT: pstxsd v2, r3(r3), 0 +; CHECK-NEXT: blr +entry: + %conv = fptosi double %str to i64 + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i64* + store i64 %conv, i64* %0, align 8 + ret void +} + +define void @store_f128_fp_to_uint_add(i8* nocapture %ptr, fp128 %str) { +; CHECK-LABEL: store_f128_fp_to_uint_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvqpudz v2, v2 +; CHECK-NEXT: lis r4, -32768 +; CHECK-NEXT: ori r4, r4, 0 +; CHECK-NEXT: rldicl r4, r4, 1, 31 +; CHECK-NEXT: add r3, r3, r4 +; CHECK-NEXT: pstxsd v2, r3(r3), 0 +; CHECK-NEXT: blr +entry: + %conv = fptoui fp128 %str to i64 + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i64* + store i64 %conv, i64* %0, align 8 + ret void +} + +define void @store_f128_fp_to_int_add(i8* nocapture %ptr, fp128 %str) { +; CHECK-LABEL: store_f128_fp_to_int_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvqpsdz v2, v2 +; CHECK-NEXT: lis r4, -32768 +; CHECK-NEXT: ori r4, r4, 0 +; CHECK-NEXT: rldicl r4, r4, 1, 31 +; CHECK-NEXT: add r3, r3, r4 +; CHECK-NEXT: pstxsd v2, r3(r3), 0 +; CHECK-NEXT: blr +entry: + %conv = fptosi fp128 %str to i64 + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to i64* + store i64 %conv, i64* %0, align 8 + ret void +} + +; v4i32, v2i64, v4f32, v2f64: loading, storing a non-constant address +define <4 x i32> @load_v4i32_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: load_v4i32_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv v2, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to <4 x i32>* + %1 = load <4 x i32>, <4 x i32>* %0, align 16 + ret <4 x i32> %1 +} + +define void @store_v4i32_add(i8* nocapture %ptr, <4 x i32> %str) { +; CHECK-LABEL: store_v4i32_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstxv v2, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to <4 x i32>* + store <4 x i32> %str, <4 x i32>* %0, align 16 + ret void +} + +define <2 x i64> @load_v2i64_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: load_v2i64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv v2, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to <2 x i64>* + %1 = load <2 x i64>, <2 x i64>* %0, align 16 + ret <2 x i64> %1 +} + +define void @store_v2i64_add(i8* nocapture %ptr, <2 x i64> %str) { +; CHECK-LABEL: store_v2i64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstxv v2, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to <2 x i64>* + store <2 x i64> %str, <2 x i64>* %0, align 16 + ret void +} + +define <4 x float> @load_v4f32_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: load_v4f32_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv v2, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to <4 x float>* + %1 = load <4 x float>, <4 x float>* %0, align 16 + ret <4 x float> %1 +} + +define void @store_v4f32_add(i8* nocapture %ptr, <4 x float> %str) { +; CHECK-LABEL: store_v4f32_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstxv v2, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to <4 x float>* + store <4 x float> %str, <4 x float>* %0, align 16 + ret void +} + +define <2 x double> @load_v2f64_add(i8* nocapture readonly %ptr) { +; CHECK-LABEL: load_v2f64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv v2, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to <2 x double>* + %1 = load <2 x double>, <2 x double>* %0, align 16 + ret <2 x double> %1 +} + +define void @store_v2f64_add(i8* nocapture %ptr, <2 x double> %str) { +; CHECK-LABEL: store_v2f64_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pstxv v2, 4294967297(r3), 0 +; CHECK-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 4294967297 + %0 = bitcast i8* %add.ptr to <2 x double>* + store <2 x double> %str, <2 x double>* %0, align 16 + ret void +} + +; Testing prefixed load/store intrinsics +define void @ld_st_intrinsics(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: ld_st_intrinsics: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxvp vsp0, 32799(r3), 0 +; CHECK-NEXT: pstxvp vsp0, 32799(r4), 0 +; CHECK-NEXT: blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 32799 + %2 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 32799 + tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*) #6 +declare void @llvm.ppc.vsx.stxvp(<256 x i1>, i8*) #7 +