Index: include/clang/AST/Stmt.h =================================================================== --- include/clang/AST/Stmt.h +++ include/clang/AST/Stmt.h @@ -1550,18 +1550,24 @@ StringLiteral **Clobbers; IdentifierInfo **Names; + // Indicates whether this statement exists inside a function with enabled AVX + // target feature (by attribute or invocation options). + bool SupportsAVX; + friend class ASTStmtReader; public: GCCAsmStmt(const ASTContext &C, SourceLocation asmloc, bool issimple, - bool isvolatile, unsigned numoutputs, unsigned numinputs, - IdentifierInfo **names, StringLiteral **constraints, Expr **exprs, - StringLiteral *asmstr, unsigned numclobbers, - StringLiteral **clobbers, SourceLocation rparenloc); + bool isvolatile, bool supportsavx, unsigned numoutputs, + unsigned numinputs, IdentifierInfo **names, + StringLiteral **constraints, Expr **exprs, StringLiteral *asmstr, + unsigned numclobbers, StringLiteral **clobbers, + SourceLocation rparenloc); /// \brief Build an empty inline-assembly statement. - explicit GCCAsmStmt(EmptyShell Empty) : AsmStmt(GCCAsmStmtClass, Empty), - Constraints(nullptr), Clobbers(nullptr), Names(nullptr) { } + explicit GCCAsmStmt(EmptyShell Empty) + : AsmStmt(GCCAsmStmtClass, Empty), Constraints(nullptr), + Clobbers(nullptr), Names(nullptr), SupportsAVX(false) {} SourceLocation getRParenLoc() const { return RParenLoc; } void setRParenLoc(SourceLocation L) { RParenLoc = L; } Index: lib/AST/Stmt.cpp =================================================================== --- lib/AST/Stmt.cpp +++ lib/AST/Stmt.cpp @@ -632,6 +632,14 @@ CurPtr = NameEnd+1; continue; + } else if (*Begin == 'v') { + // GCC accepts code staring with "%v", e. g. "%vpcmpestri" and transforms + // it into "vpcmpestri" instruction if target processor supports AVX and + // into "pcmpestri" otherwise. + if (SupportsAVX) + CurStringPiece = "v" + CurStringPiece; + CurStringPiece += EscapedChar; + continue; } DiagOffs = CurPtr-StrStart-1; @@ -682,13 +690,15 @@ //===----------------------------------------------------------------------===// GCCAsmStmt::GCCAsmStmt(const ASTContext &C, SourceLocation asmloc, - bool issimple, bool isvolatile, unsigned numoutputs, - unsigned numinputs, IdentifierInfo **names, - StringLiteral **constraints, Expr **exprs, - StringLiteral *asmstr, unsigned numclobbers, - StringLiteral **clobbers, SourceLocation rparenloc) - : AsmStmt(GCCAsmStmtClass, asmloc, issimple, isvolatile, numoutputs, - numinputs, numclobbers), RParenLoc(rparenloc), AsmStr(asmstr) { + bool issimple, bool isvolatile, bool supportsavx, + unsigned numoutputs, unsigned numinputs, + IdentifierInfo **names, StringLiteral **constraints, + Expr **exprs, StringLiteral *asmstr, + unsigned numclobbers, StringLiteral **clobbers, + SourceLocation rparenloc) + : AsmStmt(GCCAsmStmtClass, asmloc, issimple, isvolatile, numoutputs, + numinputs, numclobbers), + RParenLoc(rparenloc), AsmStr(asmstr), SupportsAVX(supportsavx) { unsigned NumExprs = NumOutputs + NumInputs; Index: lib/Sema/SemaStmtAsm.cpp =================================================================== --- lib/Sema/SemaStmtAsm.cpp +++ lib/Sema/SemaStmtAsm.cpp @@ -138,6 +138,75 @@ return false; } +// Collect all target features specified for the current context by the "target" +// attribute. Start from current DeclContext and iterate its parents until we +// find a function context. +static void CollectTargetFeatures(Sema &S, + std::vector &TargetFeatures, + StringRef &TargetCPU) { + DeclContext *DC = S.CurContext; + + auto Collect = [&TargetFeatures, &TargetCPU](const FunctionDecl *FD) { + if (!FD) + return; + if (const auto *TA = FD->getAttr()) { + TargetAttr::ParsedTargetAttr ParsedAttr = TA->parse(); + TargetFeatures.insert(TargetFeatures.begin(), ParsedAttr.first.begin(), + ParsedAttr.first.end()); + if (TargetCPU == "") + TargetCPU = ParsedAttr.second; + } + }; + + // Climb up the tree of nested DeclContext-s, collecting the target features. + while (true) { + if (isa(DC) || isa(DC) || isa(DC)) { + DC = DC->getParent(); + } else if (const auto *MD = dyn_cast(DC)) { + // Check if it's a lambda and collect its target features. + if (MD->getOverloadedOperator() == OO_Call && + cast(DC->getParent())->isLambda()) { + Collect(MD); + } + DC = DC->getParent()->getParent(); + } else { + // It's the nearest function, collect its features and break. + Collect(dyn_cast(DC)); + break; + } + } +} + +// Check whether a particular target feature is enabled in current context, +// either by command-line options or by __attribute__((__target__("..."))), +// applied to any surrounding lambda or function. +static bool TargetFeatureEnabled(Sema &S, StringRef Feature) { + const TargetInfo &TI = S.Context.getTargetInfo(); + llvm::StringMap FeatureMap; + StringRef TargetCPU; + std::vector TargetFeatures; + + // Collect target features and CPU from surrounding declarations. + CollectTargetFeatures(S, TargetFeatures, TargetCPU); + + // Don't forget to add command-line features and CPU (lowest priority). + TargetFeatures.insert(TargetFeatures.begin(), + TI.getTargetOpts().FeaturesAsWritten.begin(), + TI.getTargetOpts().FeaturesAsWritten.end()); + if (TargetCPU == "") + TargetCPU = TI.getTargetOpts().CPU; + + // Process the feature vector. + TI.initFeatureMap(FeatureMap, S.getDiagnostics(), TargetCPU, TargetFeatures); + + // See if the requested feature is present and enabled. + for (const auto & F : FeatureMap) + if (F.getValue() && F.getKey().equals(Feature)) + return true; + + return false; +} + StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple, bool IsVolatile, unsigned NumOutputs, unsigned NumInputs, IdentifierInfo **Names, @@ -150,6 +219,12 @@ StringLiteral *AsmString = cast(asmString); StringLiteral **Clobbers = reinterpret_cast(clobbers.data()); + // Determine if current asm statement exists inside a function with enabled + // "AVX" target feature. Use this information to resolve GCC-style statements + // with "%v" prefix, transforming "%vfoo" into "vfoo" with enabled AVX and + // into "foo" otherwise. + bool SupportsAVX = TargetFeatureEnabled(*this, "avx"); + SmallVector OutputConstraintInfos; // The parser verifies that there is a string literal here. @@ -158,9 +233,10 @@ // If we're compiling CUDA file and function attributes indicate that it's not // for this compilation side, skip all the checks. if (!DeclAttrsMatchCUDAMode(getLangOpts(), getCurFunctionDecl())) { - GCCAsmStmt *NS = new (Context) GCCAsmStmt( - Context, AsmLoc, IsSimple, IsVolatile, NumOutputs, NumInputs, Names, - Constraints, Exprs.data(), AsmString, NumClobbers, Clobbers, RParenLoc); + GCCAsmStmt *NS = new (Context) + GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, SupportsAVX, + NumOutputs, NumInputs, Names, Constraints, Exprs.data(), + AsmString, NumClobbers, Clobbers, RParenLoc); return NS; } @@ -344,10 +420,10 @@ diag::err_asm_unknown_register_name) << Clobber); } - GCCAsmStmt *NS = - new (Context) GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs, - NumInputs, Names, Constraints, Exprs.data(), - AsmString, NumClobbers, Clobbers, RParenLoc); + GCCAsmStmt *NS = new (Context) + GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, SupportsAVX, NumOutputs, + NumInputs, Names, Constraints, Exprs.data(), AsmString, + NumClobbers, Clobbers, RParenLoc); // Validate the asm string, ensuring it makes sense given the operands we // have. SmallVector Pieces; Index: test/CodeGen/avx-v-modifier-inline-asm.cpp =================================================================== --- test/CodeGen/avx-v-modifier-inline-asm.cpp +++ test/CodeGen/avx-v-modifier-inline-asm.cpp @@ -0,0 +1,86 @@ +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux -target-feature +avx -emit-llvm -S %s -o - | FileCheck %s -check-prefix=CHECK-AVX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux -emit-llvm -S %s -o - | FileCheck %s + +void f1(void *arg) { + __asm__ ("%vpcmpestri $0, (%1), %2" : "=c"(arg) : "r"(arg), "x"(arg)); + // CHECK-LABEL: f1 + // CHECK: pcmpestri + // CHECK-AVX: vpcmpestri +} + +// Regardless of compiler's invocation options, we should have "vpcmpestri". +void f2(void *arg) __attribute__((__target__("avx"))); +void f2(void *arg) { + __asm__ ("%vpcmpestri $0, (%1), %2" : "=c"(arg) : "r"(arg), "x"(arg)); + // CHECK-LABEL: f2 + // CHECK: vpcmpestri + // CHECK-AVX: vpcmpestri +} + +// avx2 enables avx, so always expect "vpcmpestri". +void f3(void *arg) __attribute__((__target__("avx2"))); +void f3(void *arg) { + __asm__ ("%vpcmpestri $0, (%1), %2" : "=c"(arg) : "r"(arg), "x"(arg)); + // CHECK-LABEL: f3 + // CHECK: vpcmpestri + // CHECK-AVX: vpcmpestri +} + +// Here we explicitly disable avx, so always expect "pcmpestri". +void f4(void *arg) __attribute__((__target__("no-avx"))); +void f4(void *arg) { + __asm__ ("%vpcmpestri $0, (%1), %2" : "=c"(arg) : "r"(arg), "x"(arg)); + // CHECK-LABEL: f4 + // CHECK: pcmpestri + // CHECK-AVX: pcmpestri +} + +// Check that enabling features other than avx don't affect the expected logic. +void f5(void *arg) __attribute__((__target__("mmx"))) __attribute__((__target__("sse4.1"))); +void f5(void *arg) { + __asm__ ("%vpcmpestri $0, (%1), %2" : "=c"(arg) : "r"(arg), "x"(arg)); + // CHECK-LABEL: f5 + // CHECK: pcmpestri + // CHECK-AVX: vpcmpestri +} + +void f6() { + int a = 10, b; + __asm__ ("%vmovq (%%rbp), %%xmm0" : "=r"(b) : "r"(a) : "%eax"); + // CHECK-LABEL: f6 + // CHECK: movq + // CHECK-AVX: vmovq +} + +// Lambda's attributes also counts. +void f7(void *x) { + auto f = [](void *arg) __attribute__((__target__("avx"))) { + __asm__("%vpcmpestri $0, (%1), %2" : "=c"(arg) : "r"(arg), "x"(arg)); + }; + + // CHECK-LABEL: f7 + // CHECK: vpcmpestri + // CHECK-AVX: vpcmpestri + + f(x); +} + +// Check influence of attributes of nested scopes. Here command line enables +// avx, f8 disables, then g1 enables again, so always expect "vmovq". +void f8(void *x) __attribute__((__target__("no-avx"))); +void f8(void *x) { + auto g1 = [](void *arg) __attribute__((__target__("avx2"))) { + auto g2 = [&arg]() __attribute__((__target__("no-mmx"))) { + int a = 10, b; + __asm__ ("%vmovq (%%rbp), %%xmm0" : "=r"(b) : "r"(a) : "%eax"); + + // CHECK-LABEL: f8 + // CHECK: vmovq + // CHECK-AVX: vmovq + }; + + g2(); + }; + + g1(x); +}