diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -573,6 +573,8 @@ BUILTIN(__builtin_vsx_lxvll, "V4ivC*ULLi", "") BUILTIN(__builtin_vsx_stxvl, "vV4iv*ULLi", "") BUILTIN(__builtin_vsx_stxvll, "vV4iv*ULLi", "") +BUILTIN(__builtin_vsx_ldrmb, "V16UcCc*Ii", "") +BUILTIN(__builtin_vsx_strmb, "vCc*IiV16Uc", "") BUILTIN(__builtin_vsx_xvmaxdp, "V2dV2dV2d", "") BUILTIN(__builtin_vsx_xvmaxsp, "V4fV4fV4f", "") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -15069,6 +15069,143 @@ llvm::Function *F = CGM.getIntrinsic(ID); return Builder.CreateCall(F, Ops, ""); } + case PPC::BI__builtin_vsx_ldrmb: { + // Essentially boils down to performing an unaligned VMX load sequence so + // as to avoid crossing a page boundary and then shuffling the elements + // into the right side of the vector register. + int64_t NumBytes = cast(Ops[1])->getZExtValue(); + llvm::Type *ResTy = ConvertType(E->getType()); + bool IsLE = getTarget().isLittleEndian(); + + // If the user wants the entire vector, just load the entire vector. + if (NumBytes == 16) { + Value *BC = Builder.CreateBitCast(Ops[0], ResTy->getPointerTo()); + Value *LD = Builder.CreateLoad(Address(BC, CharUnits::fromQuantity(1))); + if (!IsLE) + return LD; + + // Reverse the bytes on LE. + SmallVector RevMask; + for (int Idx = 0; Idx < 16; Idx++) + RevMask.push_back(15 - Idx); + return Builder.CreateShuffleVector(LD, LD, RevMask); + } + + llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx); + llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr + : Intrinsic::ppc_altivec_lvsl); + llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm); + Value *HiMem = Builder.CreateGEP( + Int8Ty, Ops[0], ConstantInt::get(Ops[1]->getType(), NumBytes - 1)); + Value *LoLd = Builder.CreateCall(Lvx, Ops[0], "ld.lo"); + Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi"); + Value *Mask1 = Builder.CreateCall(Lvs, Ops[0], "mask1"); + + Ops.clear(); + Ops.push_back(IsLE ? HiLd : LoLd); + Ops.push_back(IsLE ? LoLd : HiLd); + Ops.push_back(Mask1); + Value *AllElts = Builder.CreateCall(Vperm, Ops, "shuffle1"); + Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType()); + + if (IsLE) { + SmallVector Consts; + for (int Idx = 0; Idx < 16; Idx++) { + int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1) + : 16 - (NumBytes - Idx); + Consts.push_back(Val); + } + return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy), + Zero, Consts); + } + SmallVector Consts; + for (int Idx = 0; Idx < 16; Idx++) + Consts.push_back(Builder.getInt8(NumBytes + Idx)); + Value *Mask2 = ConstantVector::get(Consts); + return Builder.CreateBitCast( + Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy); + } + case PPC::BI__builtin_vsx_strmb: { + int64_t NumBytes = cast(Ops[1])->getZExtValue(); + bool IsLE = getTarget().isLittleEndian(); + auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) { + // Storing the whole vector, simply store it on BE and reverse bytes and + // store on LE. + if (Width == 16) { + Value *BC = + Builder.CreateBitCast(Ops[0], Ops[2]->getType()->getPointerTo()); + Value *StVec = Ops[2]; + if (IsLE) { + SmallVector RevMask; + for (int Idx = 0; Idx < 16; Idx++) + RevMask.push_back(15 - Idx); + StVec = Builder.CreateShuffleVector(Ops[2], Ops[2], RevMask); + } + return Builder.CreateStore(StVec, + Address(BC, CharUnits::fromQuantity(1))); + } + auto *ConvTy = Int64Ty; + unsigned NumElts = 0; + switch (Width) { + default: + llvm_unreachable("width for stores must be a power of 2"); + case 8: + ConvTy = Int64Ty; + NumElts = 2; + break; + case 4: + ConvTy = Int32Ty; + NumElts = 4; + break; + case 2: + ConvTy = Int16Ty; + NumElts = 8; + break; + case 1: + ConvTy = Int8Ty; + NumElts = 16; + break; + } + Value *Vec = Builder.CreateBitCast( + Ops[2], llvm::FixedVectorType::get(ConvTy, NumElts)); + Value *Ptr = Builder.CreateGEP(Int8Ty, Ops[0], + ConstantInt::get(Int64Ty, Offset)); + Value *PtrBC = Builder.CreateBitCast(Ptr, ConvTy->getPointerTo()); + Value *Elt = Builder.CreateExtractElement(Vec, EltNo); + if (IsLE && Width > 1) { + Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy); + Elt = Builder.CreateCall(F, Elt); + } + return Builder.CreateStore(Elt, + Address(PtrBC, CharUnits::fromQuantity(1))); + }; + unsigned Stored = 0; + unsigned RemainingBytes = NumBytes; + Value *Result; + if (NumBytes == 16) + return StoreSubVec(16, 0, 0); + if (NumBytes >= 8) { + Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1); + RemainingBytes -= 8; + Stored += 8; + } + if (RemainingBytes >= 4) { + Result = StoreSubVec(4, NumBytes - Stored - 4, + IsLE ? (Stored >> 2) : 3 - (Stored >> 2)); + RemainingBytes -= 4; + Stored += 4; + } + if (RemainingBytes >= 2) { + Result = StoreSubVec(2, NumBytes - Stored - 2, + IsLE ? (Stored >> 1) : 7 - (Stored >> 1)); + RemainingBytes -= 2; + Stored += 2; + } + if (RemainingBytes) + Result = + StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored); + return Result; + } // Square root case PPC::BI__builtin_vsx_xvsqrtsp: case PPC::BI__builtin_vsx_xvsqrtdp: { diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -3143,6 +3143,15 @@ #endif #endif +#if defined(__POWER9_VECTOR__) && defined(__powerpc64__) +#define __vec_ldrmb(PTR, CNT) vec_xl_len_r((const unsigned char *)(PTR), (CNT)) +#define __vec_strmb(PTR, CNT, VAL) \ + vec_xst_len_r((VAL), (unsigned char *)(PTR), (CNT)) +#else +#define __vec_ldrmb __builtin_vsx_ldrmb +#define __vec_strmb __builtin_vsx_strmb +#endif + /* vec_cpsgn */ #ifdef __VSX__ diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3440,6 +3440,11 @@ case PPC::BI__builtin_ppc_lbarx: return SemaFeatureCheck(*this, TheCall, "isa-v207-instructions", diag::err_ppc_builtin_only_on_arch, "8"); + case PPC::BI__builtin_vsx_ldrmb: + case PPC::BI__builtin_vsx_strmb: + return SemaFeatureCheck(*this, TheCall, "isa-v207-instructions", + diag::err_ppc_builtin_only_on_arch, "8") || + SemaBuiltinConstantArgRange(TheCall, 1, 1, 16); #define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \ case PPC::BI__builtin_##Name: \ return SemaBuiltinPPCMMACall(TheCall, Types); diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-pwr8.c b/clang/test/CodeGen/builtins-ppc-xlcompat-pwr8.c --- a/clang/test/CodeGen/builtins-ppc-xlcompat-pwr8.c +++ b/clang/test/CodeGen/builtins-ppc-xlcompat-pwr8.c @@ -44,3 +44,13 @@ // CHECK-NOPWR8: error: this builtin is only valid on POWER8 or later CPUs return __builtin_ppc_stbcx(c_addr, c); } + +vector unsigned char test_ldrmb(char *ptr) { + // CHECK-NOPWR8: error: this builtin is only valid on POWER8 or later CPUs + return __builtin_vsx_ldrmb(ptr, 14); +} + +void test_strmbb(char *ptr, vector unsigned char data) { + // CHECK-NOPWR8: error: this builtin is only valid on POWER8 or later CPUs + __builtin_vsx_strmb(ptr, 14, data); +}