Index: include/clang/AST/Type.h =================================================================== --- include/clang/AST/Type.h +++ include/clang/AST/Type.h @@ -1913,6 +1913,13 @@ /// be lost by canonicalization and desugaring. Optional getNullability(const ASTContext &context) const; + /// Determine the non-temporality of the given type. + /// + /// The non-temporal attribute is only captured as sugar within the type + /// system, not as part of the cacnonical type, so it will be lost by + /// canonicalization and desugaring. + bool hasNonTemporalAttr(const ASTContext &context) const; + /// Determine whether the given type can have a nullability /// specifier applied to it, i.e., if it is any kind of pointer type /// or a dependent type that could instantiate to any kind of @@ -3611,6 +3618,7 @@ attr_nullable, attr_null_unspecified, attr_objc_kindof, + attr_nontemporal, }; private: Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -1006,6 +1006,11 @@ let Documentation = [TypeNullUnspecifiedDocs]; } +def TypeNonTemporal : TypeAttr { + let Spellings = [GCC<"nontemporal">]; + let Documentation = [Undocumented]; +} + def ObjCKindOf : TypeAttr { let Spellings = [Keyword<"__kindof">]; let Documentation = [Undocumented]; Index: lib/AST/Type.cpp =================================================================== --- lib/AST/Type.cpp +++ lib/AST/Type.cpp @@ -2887,6 +2887,7 @@ case attr_nullable: case attr_null_unspecified: case attr_objc_kindof: + case attr_nontemporal: return false; case attr_pcs: @@ -3317,6 +3318,25 @@ return LV; } +bool Type::hasNonTemporalAttr(const ASTContext &context) const { + QualType type(this, 0); + do { + // Check whether this is an attributed type with nullability + // information. + if (auto attributed = dyn_cast(type.getTypePtr())) { + if (attributed->getAttrKind() == AttributedType::attr_nontemporal) + return true; + } + + // Desugar the type. If desugaring does nothing, we're done. + QualType desugared = type.getSingleStepDesugaredType(context); + if (desugared.getTypePtr() == type.getTypePtr()) + return false; + + type = desugared; + } while (true); +} + Optional Type::getNullability(const ASTContext &context) const { QualType type(this, 0); do { Index: lib/AST/TypePrinter.cpp =================================================================== --- lib/AST/TypePrinter.cpp +++ lib/AST/TypePrinter.cpp @@ -1212,6 +1212,11 @@ OS << ')'; break; + case AttributedType::attr_nontemporal: { + OS << "nontemporal"; + break; + } + case AttributedType::attr_vector_size: { OS << "__vector_size__("; if (const VectorType *vector =T->getEquivalentType()->getAs()) { Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -1059,7 +1059,8 @@ return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(), lvalue.getAlignment().getQuantity(), lvalue.getType(), Loc, lvalue.getTBAAInfo(), - lvalue.getTBAABaseType(), lvalue.getTBAAOffset()); + lvalue.getTBAABaseType(), lvalue.getTBAAOffset(), + lvalue.getNT()); } static bool hasBooleanRepresentation(QualType Ty) { @@ -1124,7 +1125,8 @@ SourceLocation Loc, llvm::MDNode *TBAAInfo, QualType TBAABaseType, - uint64_t TBAAOffset) { + uint64_t TBAAOffset, + bool isNonTemporal) { // For better performance, handle vector loads differently. if (Ty->isVectorType()) { llvm::Value *V; @@ -1168,6 +1170,16 @@ Load->setVolatile(true); if (Alignment) Load->setAlignment(Alignment); + if (isNonTemporal) { + llvm::LLVMContext &C = Load->getContext(); + llvm::Module *M = Load->getModule(); + SmallVector Elts; + Elts.push_back(llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), 1))); + llvm::MDNode *Node = llvm::MDNode::get(C, Elts); + Load->setMetadata(M->getMDKindID("nontemporal"), Node); + } + if (TBAAInfo) { llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset); @@ -1239,7 +1251,8 @@ bool Volatile, unsigned Alignment, QualType Ty, llvm::MDNode *TBAAInfo, bool isInit, QualType TBAABaseType, - uint64_t TBAAOffset) { + uint64_t TBAAOffset, + bool isNonTemporal) { // Handle vectors differently to get better performance. if (Ty->isVectorType()) { @@ -1280,6 +1293,16 @@ llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile); if (Alignment) Store->setAlignment(Alignment); + if (isNonTemporal) { + llvm::LLVMContext &C = Store->getContext(); + llvm::Module *M = Store->getModule(); + SmallVector Elts; + Elts.push_back(llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), 1))); + llvm::MDNode *Node = llvm::MDNode::get(C, Elts); + Store->setMetadata(M->getMDKindID("nontemporal"), Node); + } + if (TBAAInfo) { llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset); @@ -1293,7 +1316,7 @@ EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(), lvalue.getAlignment().getQuantity(), lvalue.getType(), lvalue.getTBAAInfo(), isInit, lvalue.getTBAABaseType(), - lvalue.getTBAAOffset()); + lvalue.getTBAAOffset(), lvalue.getNT()); } /// EmitLoadOfLValue - Given an expression that represents a value lvalue, this @@ -1323,6 +1346,16 @@ llvm::LoadInst *Load = Builder.CreateLoad(LV.getVectorAddr(), LV.isVolatileQualified()); Load->setAlignment(LV.getAlignment().getQuantity()); + if (LV.getNT()) { + llvm::LLVMContext &C = Load->getContext(); + llvm::Module *M = Load->getModule(); + SmallVector Elts; + Elts.push_back(llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), 1))); + llvm::MDNode *Node = llvm::MDNode::get(C, Elts); + Load->setMetadata(M->getMDKindID("nontemporal"), Node); + } + return RValue::get(Builder.CreateExtractElement(Load, LV.getVectorIdx(), "vecext")); } @@ -2034,7 +2067,10 @@ QualType T = E->getSubExpr()->getType()->getPointeeType(); assert(!T.isNull() && "CodeGenFunction::EmitUnaryOpLValue: Illegal type"); - LValue LV = MakeNaturalAlignAddrLValue(EmitScalarExpr(E->getSubExpr()), T); + LValue LV = MakeNaturalAlignAddrLValue( + EmitScalarExpr(E->getSubExpr()), T, + E->getSubExpr()->IgnoreImpCasts()->getType()->hasNonTemporalAttr( + getContext())); LV.getQuals().setAddressSpace(ExprTy.getAddressSpace()); // We should not generate __weak write barrier on indirect reference @@ -2538,15 +2574,18 @@ assert(!T.isNull() && "CodeGenFunction::EmitArraySubscriptExpr(): Illegal base type"); + bool isNonTemporal = + E->getBase()->IgnoreImpCasts()->getType()->hasNonTemporalAttr( + getContext()); // Limit the alignment to that of the result type. LValue LV; if (!ArrayAlignment.isZero()) { CharUnits Align = getContext().getTypeAlignInChars(T); ArrayAlignment = std::min(Align, ArrayAlignment); - LV = MakeAddrLValue(Address, T, ArrayAlignment); + LV = MakeAddrLValue(Address, T, ArrayAlignment, isNonTemporal); } else { - LV = MakeNaturalAlignAddrLValue(Address, T); + LV = MakeNaturalAlignAddrLValue(Address, T, isNonTemporal); } LV.getQuals().setAddressSpace(E->getBase()->getType().getAddressSpace()); Index: lib/CodeGen/CGValue.h =================================================================== --- lib/CodeGen/CGValue.h +++ lib/CodeGen/CGValue.h @@ -137,6 +137,9 @@ // this is the alignment of the whole vector.) int64_t Alignment; + // Non-temporality attribute to use when accessing this lvalue. + bool isNonTemporal : 1; + // objective-c's ivar bool Ivar:1; @@ -170,13 +173,16 @@ private: void Initialize(QualType Type, Qualifiers Quals, CharUnits Alignment, - llvm::MDNode *TBAAInfo = nullptr) { + llvm::MDNode *TBAAInfo = nullptr, + bool isNonTemporal = false) { this->Type = Type; this->Quals = Quals; this->Alignment = Alignment.getQuantity(); assert(this->Alignment == Alignment.getQuantity() && "Alignment exceeds allowed max!"); + this->isNonTemporal = isNonTemporal; + // Initialize Objective-C flags. this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false; this->ImpreciseLifetime = false; @@ -260,6 +266,8 @@ CharUnits getAlignment() const { return CharUnits::fromQuantity(Alignment); } void setAlignment(CharUnits A) { Alignment = A.getQuantity(); } + bool getNT() const { return isNonTemporal; } + void setNT(bool nt) { isNonTemporal = nt; } // simple lvalue llvm::Value *getAddress() const { assert(isSimple()); return V; } @@ -294,7 +302,8 @@ static LValue MakeAddr(llvm::Value *address, QualType type, CharUnits alignment, ASTContext &Context, - llvm::MDNode *TBAAInfo = nullptr) { + llvm::MDNode *TBAAInfo = nullptr, + bool isNonTemporal = false) { Qualifiers qs = type.getQualifiers(); qs.setObjCGCAttr(Context.getObjCGCAttrKind(type)); @@ -302,12 +311,12 @@ R.LVType = Simple; assert(address->getType()->isPointerTy()); R.V = address; - R.Initialize(type, qs, alignment, TBAAInfo); + R.Initialize(type, qs, alignment, TBAAInfo, isNonTemporal); return R; } - static LValue MakeVectorElt(llvm::Value *Vec, llvm::Value *Idx, - QualType type, CharUnits Alignment) { + static LValue MakeVectorElt(llvm::Value *Vec, llvm::Value *Idx, QualType type, + CharUnits Alignment, bool isNonTemporal = false) { LValue R; R.LVType = VectorElt; R.V = Vec; @@ -317,7 +326,8 @@ } static LValue MakeExtVectorElt(llvm::Value *Vec, llvm::Constant *Elts, - QualType type, CharUnits Alignment) { + QualType type, CharUnits Alignment, + bool isNonTemporal = false) { LValue R; R.LVType = ExtVectorElt; R.V = Vec; Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -1497,12 +1497,14 @@ //===--------------------------------------------------------------------===// LValue MakeAddrLValue(llvm::Value *V, QualType T, - CharUnits Alignment = CharUnits()) { + CharUnits Alignment = CharUnits(), + bool isNonTemporal = false) { return LValue::MakeAddr(V, T, Alignment, getContext(), - CGM.getTBAAInfo(T)); + CGM.getTBAAInfo(T), isNonTemporal); } - LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T); + LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T, + bool isNonTemporal = false); /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. The caller is responsible for setting an appropriate alignment on @@ -2343,7 +2345,8 @@ SourceLocation Loc, llvm::MDNode *TBAAInfo = nullptr, QualType TBAABaseTy = QualType(), - uint64_t TBAAOffset = 0); + uint64_t TBAAOffset = 0, + bool isNonTemporal = false); /// EmitLoadOfScalar - Load a scalar value from an address, taking /// care to appropriately convert from the memory representation to @@ -2358,7 +2361,7 @@ bool Volatile, unsigned Alignment, QualType Ty, llvm::MDNode *TBAAInfo = nullptr, bool isInit = false, QualType TBAABaseTy = QualType(), - uint64_t TBAAOffset = 0); + uint64_t TBAAOffset = 0, bool isNonTemporal = false); /// EmitStoreOfScalar - Store a scalar value to an address, taking /// care to appropriately convert from the memory representation to Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -91,7 +91,8 @@ } } -LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) { +LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T, + bool isNonTemporal) { CharUnits Alignment; if (CGM.getCXXABI().isTypeInfoCalculable(T)) { Alignment = getContext().getTypeAlignInChars(T); @@ -100,7 +101,8 @@ !getContext().isAlignmentRequired(T)) Alignment = CharUnits::fromQuantity(MaxAlign); } - return LValue::MakeAddr(V, T, Alignment, getContext(), CGM.getTBAAInfo(T)); + return LValue::MakeAddr(V, T, Alignment, getContext(), CGM.getTBAAInfo(T), + isNonTemporal); } llvm::Type *CodeGenFunction::ConvertTypeForMem(QualType T) { Index: lib/Sema/SemaType.cpp =================================================================== --- lib/Sema/SemaType.cpp +++ lib/Sema/SemaType.cpp @@ -644,6 +644,7 @@ // Objective-C __kindof does not get distributed. case AttributeList::AT_ObjCKindOf: + case AttributeList::AT_TypeNonTemporal: continue; default: @@ -4436,6 +4437,8 @@ return AttributeList::AT_TypeNonNull; case AttributedType::attr_nullable: return AttributeList::AT_TypeNullable; + case AttributedType::attr_nontemporal: + return AttributeList::AT_TypeNonTemporal; case AttributedType::attr_null_unspecified: return AttributeList::AT_TypeNullUnspecified; case AttributedType::attr_objc_kindof: @@ -6191,6 +6194,12 @@ } break; + case AttributeList::AT_TypeNonTemporal: + type = state.getSema().Context.getAttributedType( + AttributedType::attr_nontemporal, type, type); + attr.setUsedAsTypeAttr(); + break; + case AttributeList::AT_ObjCKindOf: // '__kindof' must be part of the decl-specifiers. switch (TAL) { Index: test/CodeGen/nontemporal.cpp =================================================================== --- /dev/null +++ test/CodeGen/nontemporal.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s +typedef float * PtrT; +typedef float * __attribute__((nontemporal)) NonTemporalPtrT; + +// CHECK-LABEL: @_Z3fooPfS_S_i +void foo(NonTemporalPtrT a, NonTemporalPtrT b, PtrT c, int N) { +// CHECK-DAG: [[VALUE_B:%.+]] = load float, float* %{{[0-9a-z._]+}}, align 4, !nontemporal ![[NT:[0-9]+]] +// CHECK-DAG: [[VALUE_C:%.+]] = load float, float* %{{[0-9a-z._]+}}, align 4{{$}} +// CHECK: [[VALUE_SUB:%.+]] = fsub float [[VALUE_B]], [[VALUE_C]] +// CHECK: store float [[VALUE_SUB]], float* %{{[0-9a-z._]+}}, align 4, !nontemporal ![[NT]] + a[N] = b[N] - c[N]; +} + +// CHECK-LABEL: @_Z4foo2PfS_S_ +void foo2(NonTemporalPtrT a, NonTemporalPtrT b, PtrT c) { +// CHECK-DAG: [[VALUE_B:%.+]] = load float, float* %{{[0-9]+}}, align 4, !nontemporal ![[NT:[0-9]+]] +// CHECK-DAG: [[VALUE_C:%.+]] = load float, float* %{{[0-9]+}}, align 4{{$}} +// CHECK: [[VALUE_SUB:%.+]] = fsub float [[VALUE_B]], [[VALUE_C]] +// CHECK: store float [[VALUE_SUB]], float* %{{[0-9]+}}, align 4, !nontemporal ![[NT]] + *a = *b - *c; +}