Index: include/clang/Basic/Builtins.def =================================================================== --- include/clang/Basic/Builtins.def +++ include/clang/Basic/Builtins.def @@ -1244,6 +1244,25 @@ BUILTIN(__builtin___get_unsafe_stack_start, "v*", "Fn") BUILTIN(__builtin___get_unsafe_stack_ptr, "v*", "Fn") +// Nontemporal loads/stores builtins +BUILTIN(__builtin_nontemporal_store, "v.", "t") +BUILTIN(__builtin_nontemporal_store_1, "vcc*.", "") +BUILTIN(__builtin_nontemporal_store_2, "vss*.", "") +BUILTIN(__builtin_nontemporal_store_4, "vii*.", "") +BUILTIN(__builtin_nontemporal_store_8, "vLLiLLi*.", "") +BUILTIN(__builtin_nontemporal_store_16, "vLLLiLLLi*.", "") +BUILTIN(__builtin_nontemporal_store_f, "vff*.", "") +BUILTIN(__builtin_nontemporal_store_d, "vdd*.", "") + +BUILTIN(__builtin_nontemporal_load, "v.", "t") +BUILTIN(__builtin_nontemporal_load_1, "cc*.", "") +BUILTIN(__builtin_nontemporal_load_2, "ss*.", "") +BUILTIN(__builtin_nontemporal_load_4, "ii*.", "") +BUILTIN(__builtin_nontemporal_load_8, "LLiLLi*.", "") +BUILTIN(__builtin_nontemporal_load_16, "LLLiLLLi*.", "") +BUILTIN(__builtin_nontemporal_load_f, "ff*.", "") +BUILTIN(__builtin_nontemporal_load_d, "dd*.", "") + #undef BUILTIN #undef LIBBUILTIN #undef LANGBUILTIN Index: include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- include/clang/Basic/DiagnosticSemaKinds.td +++ include/clang/Basic/DiagnosticSemaKinds.td @@ -6193,6 +6193,15 @@ "atomic %select{load|store}0 requires runtime support that is not " "available for this target">; +def err_nontemporal_builtin_must_be_pointer : Error< + "address argument to nontemporal builtin must be a pointer (%0 invalid)">; +def err_nontemporal_builtin_must_be_pointer_intfltptr : Error< + "address argument to nontemporal builtin must be a pointer to integer, float " + "or pointer (%0 invalid)">; +def err_nontemporal_builtin_pointer_size : Error< + "address argument to nontemporal builtin must be a pointer to 1,2,4,8 or 16 " + "byte type (%0 invalid)">; + def err_deleted_function_use : Error<"attempt to use a deleted function">; def err_kern_type_not_void_return : Error< Index: include/clang/Sema/Sema.h =================================================================== --- include/clang/Sema/Sema.h +++ include/clang/Sema/Sema.h @@ -8831,6 +8831,7 @@ bool SemaBuiltinLongjmp(CallExpr *TheCall); bool SemaBuiltinSetjmp(CallExpr *TheCall); ExprResult SemaBuiltinAtomicOverloaded(ExprResult TheCallResult); + ExprResult SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult); ExprResult SemaAtomicOpsOverloaded(ExprResult TheCallResult, AtomicExpr::AtomicOp Op); bool SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum, Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -111,6 +111,44 @@ return EmitFromInt(CGF, Result, T, ValueType); } +static Value *MakeNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { + Value *Val = CGF.EmitScalarExpr(E->getArg(0)); + Value *Address = CGF.EmitScalarExpr(E->getArg(1)); + + llvm::Type *ValueTy = Val->getType(); + unsigned Align = CGF.getContext() + .getTypeSizeInChars(E->getArg(1)->getType()) + .getQuantity(); + + llvm::MDNode *Node = + llvm::MDNode::get(CGF.getLLVMContext(), + llvm::ConstantAsMetadata::get(CGF.Builder.getInt32(1))); + + // Convert the type of the pointer to a pointer to the stored type. + Value *BC = CGF.Builder.CreateBitCast( + Address, llvm::PointerType::getUnqual(ValueTy), "cast"); + StoreInst *SI = CGF.Builder.CreateStore(Val, BC); + SI->setMetadata(CGF.CGM.getModule().getMDKindID("nontemporal"), Node); + SI->setAlignment(Align); + return nullptr; +} + +static Value *MakeNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { + Value *Address = CGF.EmitScalarExpr(E->getArg(0)); + unsigned Align = CGF.getContext() + .getTypeSizeInChars(E->getArg(0)->getType()) + .getQuantity(); + + llvm::MDNode *Node = + llvm::MDNode::get(CGF.getLLVMContext(), + llvm::ConstantAsMetadata::get(CGF.Builder.getInt32(1))); + + LoadInst *LI = CGF.Builder.CreateLoad(Address, "ntload"); + LI->setMetadata(CGF.CGM.getModule().getMDKindID("nontemporal"), Node); + LI->setAlignment(Align); + return LI; +} + static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E) { @@ -1004,6 +1042,8 @@ case Builtin::BI__sync_lock_test_and_set: case Builtin::BI__sync_lock_release: case Builtin::BI__sync_swap: + case Builtin::BI__builtin_nontemporal_load: + case Builtin::BI__builtin_nontemporal_store: llvm_unreachable("Shouldn't make it through sema"); case Builtin::BI__sync_fetch_and_add_1: case Builtin::BI__sync_fetch_and_add_2: @@ -1153,6 +1193,22 @@ return RValue::get(nullptr); } + case Builtin::BI__builtin_nontemporal_load_1: + case Builtin::BI__builtin_nontemporal_load_2: + case Builtin::BI__builtin_nontemporal_load_4: + case Builtin::BI__builtin_nontemporal_load_8: + case Builtin::BI__builtin_nontemporal_load_16: + case Builtin::BI__builtin_nontemporal_load_f: + case Builtin::BI__builtin_nontemporal_load_d: + return RValue::get(MakeNontemporalLoad(*this, E)); + case Builtin::BI__builtin_nontemporal_store_1: + case Builtin::BI__builtin_nontemporal_store_2: + case Builtin::BI__builtin_nontemporal_store_4: + case Builtin::BI__builtin_nontemporal_store_8: + case Builtin::BI__builtin_nontemporal_store_16: + case Builtin::BI__builtin_nontemporal_store_f: + case Builtin::BI__builtin_nontemporal_store_d: + return RValue::get(MakeNontemporalStore(*this, E)); case Builtin::BI__c11_atomic_is_lock_free: case Builtin::BI__atomic_is_lock_free: { // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the Index: lib/Sema/SemaChecking.cpp =================================================================== --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -440,6 +440,9 @@ case Builtin::BI__sync_swap_8: case Builtin::BI__sync_swap_16: return SemaBuiltinAtomicOverloaded(TheCallResult); + case Builtin::BI__builtin_nontemporal_load: + case Builtin::BI__builtin_nontemporal_store: + return SemaBuiltinNontemporalOverloaded(TheCallResult); #define BUILTIN(ID, TYPE, ATTRS) #define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \ case Builtin::BI##ID: \ @@ -2209,6 +2212,152 @@ return TheCallResult; } +/// SemaBuiltinNontemporalOverloaded - We have a call to +/// __builtin_nontemporal_store or __builtin_nontemporal_load, which is an +/// overloaded function based on the pointer type of its last argument. +/// The main ActOnCallExpr routines have already promoted the types of +/// arguments because all of these calls are prototyped as void(...). +/// +/// This function goes through and does final semantic checking for these +/// builtins. +ExprResult Sema::SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult) { + CallExpr *TheCall = (CallExpr *)TheCallResult.get(); + DeclRefExpr *DRE = + cast(TheCall->getCallee()->IgnoreParenCasts()); + FunctionDecl *FDecl = cast(DRE->getDecl()); + unsigned BuiltinID = FDecl->getBuiltinID(); + assert((BuiltinID == Builtin::BI__builtin_nontemporal_store || + BuiltinID == Builtin::BI__builtin_nontemporal_load) && + "Unexpected nontemporal load/store builtin!"); + bool isStore = BuiltinID == Builtin::BI__builtin_nontemporal_store; + unsigned numArgs = isStore ? 2 : 1; + + // Ensure that we have at least one argument to do type inference from. + if (TheCall->getNumArgs() < numArgs) { + Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args_at_least) + << 0 << 1 << TheCall->getNumArgs() + << TheCall->getCallee()->getSourceRange(); + return ExprError(); + } + + // Inspect the last argument of the nontemporal builtin. This should always + // be a pointer type, whose element is an integral scalar, float scalar, or + // pointer type. + // Because it is a pointer type, we don't have to worry about any implicit + // casts here. + Expr *AddressArg = TheCall->getArg(numArgs - 1); + ExprResult AddressArgResult = + DefaultFunctionArrayLvalueConversion(AddressArg); + if (AddressArgResult.isInvalid()) + return ExprError(); + AddressArg = AddressArgResult.get(); + TheCall->setArg(numArgs - 1, AddressArg); + + const PointerType *pointerType = AddressArg->getType()->getAs(); + if (!pointerType) { + Diag(DRE->getLocStart(), diag::err_nontemporal_builtin_must_be_pointer) + << AddressArg->getType() << AddressArg->getSourceRange(); + return ExprError(); + } + + QualType ValType = pointerType->getPointeeType(); + if (!ValType->isIntegerType() && !ValType->isAnyPointerType() && + !ValType->isBlockPointerType() && !ValType->isFloatingType()) { + Diag(DRE->getLocStart(), + diag::err_nontemporal_builtin_must_be_pointer_intfltptr) + << AddressArg->getType() << AddressArg->getSourceRange(); + return ExprError(); + } + + // Strip any qualifiers off ValType. + ValType = ValType.getUnqualifiedType(); + +#define BUILTIN_ROW(x) \ + { Builtin::BI##x##_1, Builtin::BI##x##_2, Builtin::BI##x##_4, \ + Builtin::BI##x##_8, Builtin::BI##x##_16, \ + Builtin::BI##x##_f, Builtin::BI##x##_d } + + static const unsigned BuiltinIndices[][7] = { + BUILTIN_ROW(__builtin_nontemporal_store), + BUILTIN_ROW(__builtin_nontemporal_load) + }; +#undef BUILTIN_ROW + + // Determine the index of the size. + unsigned SizeIndex; + if (ValType->isFloatingType()) { + switch (Context.getTypeSizeInChars(ValType).getQuantity()) { + case 4: SizeIndex = 5; break; + case 8: SizeIndex = 6; break; + default: + Diag(DRE->getLocStart(), diag::err_nontemporal_builtin_pointer_size) + << AddressArg->getType() << AddressArg->getSourceRange(); + return ExprError(); + } + } else { + switch (Context.getTypeSizeInChars(ValType).getQuantity()) { + case 1: SizeIndex = 0; break; + case 2: SizeIndex = 1; break; + case 4: SizeIndex = 2; break; + case 8: SizeIndex = 3; break; + case 16: SizeIndex = 4; break; + default: + Diag(DRE->getLocStart(), diag::err_nontemporal_builtin_pointer_size) + << AddressArg->getType() << AddressArg->getSourceRange(); + return ExprError(); + } + } + // Get the decl for the concrete builtin from this, we can tell what the + // concrete integer type we should convert to is. + unsigned NewBuiltinID = BuiltinIndices[isStore ? 0 : 1][SizeIndex]; + const char *NewBuiltinName = Context.BuiltinInfo.getName(NewBuiltinID); + + // Perform builtin lookup to avoid redeclaring it. + DeclarationName DN(&Context.Idents.get(NewBuiltinName)); + LookupResult Res(*this, DN, DRE->getLocStart(), LookupOrdinaryName); + LookupName(Res, TUScope, /*AllowBuiltinCreation=*/true); + assert(Res.getFoundDecl()); + FunctionDecl *NewBuiltinDecl = dyn_cast(Res.getFoundDecl()); + if (!NewBuiltinDecl) + return ExprError(); + + // For stores convert the value (the first argument) to the type of the + // pointer (the second argument). + if (isStore) { + ExprResult Arg = TheCall->getArg(0); + InitializedEntity Entity = InitializedEntity::InitializeParameter( + Context, ValType, /*consume*/ false); + Arg = PerformCopyInitialization(Entity, SourceLocation(), Arg); + if (Arg.isInvalid()) + return ExprError(); + + TheCall->setArg(0, Arg.get()); + } + + ASTContext &Context = this->getASTContext(); + + // Create a new DeclRefExpr to refer to the new decl. + DeclRefExpr *NewDRE = DeclRefExpr::Create( + Context, DRE->getQualifierLoc(), SourceLocation(), NewBuiltinDecl, + /*enclosing*/ false, DRE->getLocation(), Context.BuiltinFnTy, + DRE->getValueKind()); + + // Set the callee in the CallExpr. + // FIXME: This loses syntactic information. + QualType CalleePtrTy = Context.getPointerType(NewBuiltinDecl->getType()); + ExprResult PromotedCall = + ImpCastExprToType(NewDRE, CalleePtrTy, CK_BuiltinFnToFnPtr); + TheCall->setCallee(PromotedCall.get()); + + // For loads change the result type of the call to match the original value + // type. This is arbitrary, but the codegen for these builtins ins design to + // handle it gracefully. + if (!isStore) + TheCall->setType(ValType); + + return TheCallResult; +} + /// CheckObjCString - Checks that the argument to the builtin /// CFString constructor is correct /// Note: It might also make sense to do the UTF-16 conversion here (would Index: test/CodeGen/Nontemporal.c =================================================================== --- /dev/null +++ test/CodeGen/Nontemporal.c @@ -0,0 +1,56 @@ +// Test frontend handling of nontemporal builtins. +// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s + +signed char sc; +unsigned char uc; +signed short ss; +unsigned short us; +signed int si; +unsigned int ui; +signed long long sll; +unsigned long long ull; +float f1, f2; +double d1, d2; + +void test_explicit_datasize (void) // CHECK-LABEL: define void @test_explicit_datasize +{ + __builtin_nontemporal_store(f1, &f2); // CHECK: store float{{.*}}!nontemporal + __builtin_nontemporal_store_f(f1, &f2); // CHECK: store float{{.*}}!nontemporal + __builtin_nontemporal_store_1(sc, &f2); // CHECK: store i8{{.*}}!nontemporal + __builtin_nontemporal_store_4(d1, &f2); // CHECK: store i32{{.*}}!nontemporal + + f2 = __builtin_nontemporal_load(&f1); // CHECK: load float{{.*}}!nontemporal + f2 = __builtin_nontemporal_load_f(&f1); // CHECK: load float{{.*}}!nontemporal + sc = __builtin_nontemporal_load_1(&f2); // CHECK: load i8{{.*}}!nontemporal + d1 = __builtin_nontemporal_load_1(&f2); // CHECK: [[D1:%[a-z0-9._]+]] = load i8{{.*}}!nontemporal + // CHECK: sitofp i8 [[D1]] to double + si = __builtin_nontemporal_load_4(&f2); // CHECK: load i32{{.*}}!nontemporal + sll = __builtin_nontemporal_load_8(&f2); // CHECK: load i64{{.*}}!nontemporal + si = __builtin_nontemporal_load(&f1); // CHECK: [[SI:%[a-z0-9._]+]] = load float{{.*}}!nontemporal + // CHECK: fptosi float [[SI]] to i32 +} + +void test_all_sizes (void) // CHECK-LABEL: define void @test_all_sizes +{ + __builtin_nontemporal_store(1, &uc); // CHECK: store i8{{.*}}!nontemporal + __builtin_nontemporal_store(1, &sc); // CHECK: store i8{{.*}}!nontemporal + __builtin_nontemporal_store(1, &us); // CHECK: store i16{{.*}}!nontemporal + __builtin_nontemporal_store(1, &ss); // CHECK: store i16{{.*}}!nontemporal + __builtin_nontemporal_store(1, &ui); // CHECK: store i32{{.*}}!nontemporal + __builtin_nontemporal_store(1, &si); // CHECK: store i32{{.*}}!nontemporal + __builtin_nontemporal_store(1, &ull); // CHECK: store i64{{.*}}!nontemporal + __builtin_nontemporal_store(1, &sll); // CHECK: store i64{{.*}}!nontemporal + __builtin_nontemporal_store(1.0, &f1); // CHECK: store float{{.*}}!nontemporal + __builtin_nontemporal_store(1.0, &d1); // CHECK: store double{{.*}}!nontemporal + + uc = __builtin_nontemporal_load(&sc); // CHECK: load i8{{.*}}!nontemporal + sc = __builtin_nontemporal_load(&uc); // CHECK: load i8{{.*}}!nontemporal + us = __builtin_nontemporal_load(&ss); // CHECK: load i16{{.*}}!nontemporal + ss = __builtin_nontemporal_load(&us); // CHECK: load i16{{.*}}!nontemporal + ui = __builtin_nontemporal_load(&si); // CHECK: load i32{{.*}}!nontemporal + si = __builtin_nontemporal_load(&ui); // CHECK: load i32{{.*}}!nontemporal + ull = __builtin_nontemporal_load(&sll); // CHECK: load i64{{.*}}!nontemporal + sll = __builtin_nontemporal_load(&ull); // CHECK: load i64{{.*}}!nontemporal + f1 = __builtin_nontemporal_load(&f2); // CHECK: load float{{.*}}!nontemporal + d1 = __builtin_nontemporal_load(&d2); // CHECK: load double{{.*}}!nontemporal +}