diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -1469,6 +1469,7 @@ BUILTIN(__builtin_char_memchr, "c*cC*iz", "n") BUILTIN(__builtin_dump_struct, "ivC*v*", "tn") BUILTIN(__builtin_preserve_access_index, "v.", "t") +BUILTIN(__builtin_preserve_bitfield_info, "LLUi.", "t") // Safestack builtins BUILTIN(__builtin___get_unsafe_stack_start, "v*", "Fn") diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9938,6 +9938,9 @@ "%select{non-pointer|function pointer|void pointer}0 argument to " "'__builtin_launder' is not allowed">; +def err_preserve_bitfield_info_not_bitfield : Error< + "__builtin_preserve_bitfield_info argument not a bitfield access">; + def err_bit_cast_non_trivially_copyable : Error< "__builtin_bit_cast %select{source|destination}0 type must be trivially copyable">; def err_bit_cast_type_size_mismatch : Error< diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1882,6 +1882,24 @@ IsInPreservedAIRegion = false; return RValue::get(Res); } + case Builtin::BI__builtin_preserve_bitfield_info: { + if (!getDebugInfo()) { + CGM.Error(E->getExprLoc(), "using builtin_preserve_bitfield_info() without -g"); + return RValue::get(EmitLValue(E->getArg(0)).getBitFieldPointer()); + } + + // Nested builtin_preserve_bitfield_info() not supported + if (IsInPreservedBFRegion) { + CGM.Error(E->getExprLoc(), "nested builtin_preserve_bitfield_info() not supported"); + return RValue::get(EmitLValue(E->getArg(0)).getBitFieldPointer()); + } + + IsInPreservedBFRegion = true; + Value *Res = EmitLValue(E->getArg(0)).getBitFieldPointer(); + IsInPreservedBFRegion = false; + + return RValue::get(Builder.CreatePreserveBitFieldAccessIndex(Res)); + } case Builtin::BI__builtin_cimag: case Builtin::BI__builtin_cimagf: diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3990,9 +3990,19 @@ const CGBitFieldInfo &Info = RL.getBitFieldInfo(field); Address Addr = base.getAddress(); unsigned Idx = RL.getLLVMFieldNo(field); - if (Idx != 0) - // For structs, we GEP to the field that the record layout suggests. - Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); + if (!IsInPreservedBFRegion) { + if (Idx != 0) + // For structs, we GEP to the field that the record layout suggests. + Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); + } else { + const RecordDecl *rec = field->getParent(); + llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType( + getContext().getRecordType(rec), rec->getLocation()); + Addr = Builder.CreatePreserveStructAccessIndex(Addr, Idx, + getDebugInfoFIndex(rec, field->getFieldIndex()), + DbgInfo); + } + // Get the access type. llvm::Type *FieldIntTy = llvm::Type::getIntNTy(getLLVMContext(), Info.StorageSize); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -484,6 +484,10 @@ /// region. bool IsInPreservedAIRegion = false; + /// True if CodeGen currently emits code inside presereved bitfield index + /// region. + bool IsInPreservedBFRegion = false; + const CodeGen::CGBlockInfo *BlockInfo = nullptr; llvm::Value *BlockPointer = nullptr; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1429,6 +1429,21 @@ if (SemaBuiltinPreserveAI(*this, TheCall)) return ExprError(); break; + case Builtin::BI__builtin_preserve_bitfield_info: { + if (checkArgCount(*this, TheCall, 1)) + return ExprError(); + + Expr *Arg = TheCall->getArg(0); + if (Arg->getType()->getAsPlaceholderType() || + Arg->IgnoreParens()->getObjectKind() != OK_BitField) { + Diag(Arg->getBeginLoc(), diag::err_preserve_bitfield_info_not_bitfield) + << Arg->getSourceRange(); + return ExprError(); + } + + TheCall->setType(Context.UnsignedLongLongTy); + break; + } case Builtin::BI__builtin_call_with_static_chain: if (SemaBuiltinCallWithStaticChain(*this, TheCall)) return ExprError(); diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -2572,6 +2572,20 @@ return Fn; } + Value *CreatePreserveBitFieldAccessIndex(Value *Base) { + assert(isa(Base->getType()) && + "Invalid Base ptr type for preserve.bitfield.access.index."); + auto *BaseType = Base->getType(); + + Module *M = BB->getParent()->getParent(); + Function *FnPreserveBitFieldAccessIndex = Intrinsic::getDeclaration( + M, Intrinsic::preserve_bitfield_access_index, {BaseType}); + + CallInst *Fn = CreateCall(FnPreserveBitFieldAccessIndex, {Base}); + + return Fn; + } + private: /// Helper function that creates an assume intrinsic call that /// represents an alignment assumption on the provided Ptr, Mask, Type diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1258,6 +1258,9 @@ llvm_i32_ty], [IntrNoMem, ImmArg<1>, ImmArg<2>]>; +def int_preserve_bitfield_access_index : Intrinsic<[llvm_i64_ty], + [llvm_anyptr_ty], + [IntrNoMem]>; //===----------------------------------------------------------------------===// // Target-specific intrinsics diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp --- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -50,6 +50,32 @@ // addr = preserve_struct_access_index(base, gep_index, di_index) // !llvm.preserve.access.index // +// Bitfield member access needs special attention. User cannot take the +// address of a bitfield access. To avoid generating loads which might +// have side effects with bpf programs, e.g., verifier may reject it, +// a new clang intrinsic (__builtin_preserve_bitfield_info()) is added +// to return bitfield member offset, signness and member size, given +// a bitfield access. For example, +// struct s { int a; int b:3; int b2:4; } arg; +// uint64_t __builtin_preserve_bitfield_info(arg->b) +// The application can then use member offset, signness and member size +// to perform bit field extraction. +// +// A new IR intrinsic: +// uint64_t preserve_array_access_index(base) +// is introduced. +// The above __builtin_preserve_bitfield_info(arg->b) will generate +// two preserve_*_access_index() calls like +// addr = preserve_struct_access_index(base, 1, 1) !struct s +// uint64_t preserve_array_access_index(addr) +// The above two IR intrinsics will be eventually replaced with +// a relocatable insn: +// bitfield_info = 8ULL << 48 /* member offset */ | +// 1 << 7 /* signness */ | +// 3 /* member size */ +// and bitfield_info can be changed by bpf loader based on the +// types on the host. +// //===----------------------------------------------------------------------===// #include "BPF.h" @@ -95,14 +121,15 @@ BPFPreserveArrayAI = 1, BPFPreserveUnionAI = 2, BPFPreserveStructAI = 3, + BPFPreserveBitFieldAI = 4, }; std::map GEPGlobals; // A map to link preserve_*_access_index instrinsic calls. std::map> AIChain; // A map to hold all the base preserve_*_access_index instrinsic calls. - // The base call is not an input of any other preserve_*_access_index - // intrinsics. + // The base call is an input of something else other than + // preserve_*_access_index intrinsics. std::map BaseAICalls; bool doTransformation(Module &M); @@ -125,7 +152,7 @@ Value *computeBaseAndAccessKey(CallInst *Call, std::string &AccessKey, uint32_t Kind, MDNode *&BaseMeta); - bool getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex); + uint64_t getAccessIndex(const Value *IndexValue); bool transformGEPChain(Module &M, CallInst *Call, uint32_t Kind); }; } // End anonymous namespace @@ -228,6 +255,11 @@ ->getZExtValue(); return true; } + if (GV->getName().startswith("llvm.preserve.bitfield.access.index")) { + Kind = BPFPreserveBitFieldAI; + TypeMeta = nullptr; + return true; + } return false; } @@ -307,6 +339,9 @@ uint32_t ParentAI, const MDNode *ChildType) { const DIType *PType = stripQualifiers(cast(ParentType)); + if (!ChildType) + return true; // Bitfield Parent, no type comparison needed. + const DIType *CType = stripQualifiers(cast(ChildType)); // Child is a derived/pointer type, which is due to type casting. @@ -465,14 +500,11 @@ } /// Get access index from the preserve_*_access_index intrinsic calls. -bool BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue, - uint64_t &AccessIndex) { +uint64_t BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue) { const ConstantInt *CV = dyn_cast(IndexValue); - if (!CV) - return false; + assert(CV); - AccessIndex = CV->getValue().getZExtValue(); - return true; + return CV->getValue().getZExtValue(); } /// Compute the base of the whole preserve_*_access_index chains, i.e., the base @@ -504,7 +536,7 @@ // int a[10][20]; ... __builtin_preserve_access_index(&a[2][3]) ... // we will skip them. uint32_t FirstIndex = 0; - uint32_t AccessOffset = 0; + uint64_t PatchImm = 0; // AccessOffset or the BitField Info Encoding while (CallStack.size()) { auto StackElem = CallStack.top(); Call = StackElem.first; @@ -519,17 +551,18 @@ // struct or union type TypeName = Ty->getName(); TypeMeta = Ty; - AccessOffset += FirstIndex * Ty->getSizeInBits() >> 3; + PatchImm += FirstIndex * Ty->getSizeInBits() >> 3; break; } + assert(Kind == BPFPreserveBitFieldAI && + "Reaching preserve_bitfield_access_index and cannot find top type name"); + // Array entries will always be consumed for accumulative initial index. CallStack.pop(); // BPFPreserveArrayAI - uint64_t AccessIndex; - if (!getAccessIndex(Call->getArgOperand(2), AccessIndex)) - return nullptr; + uint64_t AccessIndex = getAccessIndex(Call->getArgOperand(2)); DIType *BaseTy = nullptr; bool CheckElemType = false; @@ -569,7 +602,7 @@ else TypeName = CTy->getName(); TypeMeta = CTy; - AccessOffset += FirstIndex * CTy->getSizeInBits() >> 3; + PatchImm += FirstIndex * CTy->getSizeInBits() >> 3; break; } } @@ -584,11 +617,12 @@ Kind = StackElem.second; CallStack.pop(); + if (Kind == BPFPreserveBitFieldAI) + break; + // Access Index - uint64_t AccessIndex; uint32_t ArgIndex = (Kind == BPFPreserveUnionAI) ? 1 : 2; - if (!getAccessIndex(Call->getArgOperand(ArgIndex), AccessIndex)) - return nullptr; + uint64_t AccessIndex = getAccessIndex(Call->getArgOperand(ArgIndex)); AccessKey += ":" + std::to_string(AccessIndex); MDNode *MDN = Call->getMetadata(LLVMContext::MD_preserve_access_index); @@ -597,17 +631,36 @@ uint32_t Tag = CTy->getTag(); if (Tag == dwarf::DW_TAG_structure_type) { auto *MemberTy = cast(CTy->getElements()[AccessIndex]); - AccessOffset += MemberTy->getOffsetInBits() >> 3; + if (!MemberTy->isBitField()) { + PatchImm += MemberTy->getOffsetInBits() >> 3; + } else { + // BitField, PatchImm will be an encoding for bitfield information. + PatchImm = (MemberTy->getOffsetInBits() << 16) | MemberTy->getSizeInBits(); + const DIType *BaseTy = stripQualifiers(MemberTy->getBaseType()); + // Base Type can be integer type or enum + const auto *BTy = dyn_cast(BaseTy); + while (!BTy) { + const auto *CompTy = dyn_cast(BaseTy); + assert(CompTy); + assert(CompTy->getTag() == dwarf::DW_TAG_enumeration_type); + BaseTy = stripQualifiers(CompTy->getBaseType()); + BTy = dyn_cast(BaseTy); + } + + uint32_t Encoding = BTy->getEncoding(); + if (Encoding == dwarf::DW_ATE_signed || Encoding == dwarf::DW_ATE_signed_char) + PatchImm |= 1 << 15; + } } else if (Tag == dwarf::DW_TAG_array_type) { auto *EltTy = stripQualifiers(CTy->getBaseType()); - AccessOffset += AccessIndex * calcArraySize(CTy, 1) * + PatchImm += AccessIndex * calcArraySize(CTy, 1) * EltTy->getSizeInBits() >> 3; } } - // Access key is the type name + access string, uniquely identifying + // Access key is the type name + patch immediate string, uniquely identifying // one kernel memory access. - AccessKey = TypeName + ":" + std::to_string(AccessOffset) + "$" + AccessKey; + AccessKey = TypeName + ":" + std::to_string(PatchImm) + "$" + AccessKey; return Base; } @@ -646,25 +699,34 @@ GV = GEPGlobals[AccessKey]; } - // Load the global variable. - auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV); - BB->getInstList().insert(Call->getIterator(), LDInst); + if (Kind == BPFPreserveBitFieldAI) { + // Load the global variable. + auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV); + BB->getInstList().insert(Call->getIterator(), LDInst); + + Call->replaceAllUsesWith(LDInst); + Call->eraseFromParent(); + } else { + // Load the global variable. + auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV); + BB->getInstList().insert(Call->getIterator(), LDInst); - // Generate a BitCast - auto *BCInst = new BitCastInst(Base, Type::getInt8PtrTy(BB->getContext())); - BB->getInstList().insert(Call->getIterator(), BCInst); + // Generate a BitCast + auto *BCInst = new BitCastInst(Base, Type::getInt8PtrTy(BB->getContext())); + BB->getInstList().insert(Call->getIterator(), BCInst); - // Generate a GetElementPtr - auto *GEP = GetElementPtrInst::Create(Type::getInt8Ty(BB->getContext()), - BCInst, LDInst); - BB->getInstList().insert(Call->getIterator(), GEP); + // Generate a GetElementPtr + auto *GEP = GetElementPtrInst::Create(Type::getInt8Ty(BB->getContext()), + BCInst, LDInst); + BB->getInstList().insert(Call->getIterator(), GEP); - // Generate a BitCast - auto *BCInst2 = new BitCastInst(GEP, Call->getType()); - BB->getInstList().insert(Call->getIterator(), BCInst2); + // Generate a BitCast + auto *BCInst2 = new BitCastInst(GEP, Call->getType()); + BB->getInstList().insert(Call->getIterator(), BCInst2); - Call->replaceAllUsesWith(BCInst2); - Call->eraseFromParent(); + Call->replaceAllUsesWith(BCInst2); + Call->eraseFromParent(); + } return true; } diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -254,7 +254,7 @@ StringMap> FileContent; std::map> DataSecEntries; std::vector StructTypes; - std::map AccessOffsets; + std::map PatchImms; std::map>> FixupDerivedTypes; diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -966,14 +966,14 @@ size_t FirstDollar = AccessPattern.find_first_of('$'); size_t FirstColon = AccessPattern.find_first_of(':'); StringRef IndexPattern = AccessPattern.substr(FirstDollar + 1); - StringRef OffsetStr = AccessPattern.substr(FirstColon + 1, + StringRef PatchImmStr = AccessPattern.substr(FirstColon + 1, FirstDollar - FirstColon); BTFOffsetReloc OffsetReloc; OffsetReloc.Label = ORSym; OffsetReloc.OffsetNameOff = addString(IndexPattern); OffsetReloc.TypeID = RootId; - AccessOffsets[AccessPattern.str()] = std::stoi(OffsetStr); + PatchImms[AccessPattern.str()] = std::stoull(PatchImmStr); OffsetRelocTable[SecNameOff].push_back(OffsetReloc); } @@ -1157,10 +1157,13 @@ MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index); DIType *Ty = dyn_cast(MDN); std::string TypeName = Ty->getName(); - int64_t Imm = AccessOffsets[GVar->getName().str()]; + uint64_t Imm = PatchImms[GVar->getName().str()]; - // Emit "mov ri, " for abstract member accesses. - OutMI.setOpcode(BPF::MOV_ri); + // Emit "mov/ld_imm64 ri, " for patched immediate. + if (Imm > UINT_MAX) + OutMI.setOpcode(BPF::LD_imm64); + else + OutMI.setOpcode(BPF::MOV_ri); OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); OutMI.addOperand(MCOperand::createImm(Imm)); return true;