Index: cfe/trunk/lib/Format/Format.cpp =================================================================== --- cfe/trunk/lib/Format/Format.cpp +++ cfe/trunk/lib/Format/Format.cpp @@ -32,6 +32,7 @@ #include "clang/Basic/VirtualFileSystem.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Path.h" @@ -40,6 +41,7 @@ #include #include #include +#include #define DEBUG_TYPE "format-formatter" @@ -48,6 +50,16 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory) LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::RawStringFormat) +namespace std { +// Allow using StringRef in std::unordered_set. +template <> struct hash { +public: + size_t operator()(const llvm::StringRef &s) const { + return llvm::hash_value(s); + } +}; +} // namespace std + namespace llvm { namespace yaml { template <> struct ScalarEnumerationTraits { @@ -1400,6 +1412,101 @@ std::set DeletedTokens; }; +class ObjCHeaderStyleGuesser : public TokenAnalyzer { +public: + ObjCHeaderStyleGuesser(const Environment &Env, const FormatStyle &Style) + : TokenAnalyzer(Env, Style), IsObjC(false) {} + + std::pair + analyze(TokenAnnotator &Annotator, + SmallVectorImpl &AnnotatedLines, + FormatTokenLexer &Tokens) override { + assert(Style.Language == FormatStyle::LK_Cpp); + IsObjC = guessIsObjC(AnnotatedLines, Tokens.getKeywords()); + tooling::Replacements Result; + return {Result, 0}; + } + + bool isObjC() { return IsObjC; } + +private: + static bool guessIsObjC(const SmallVectorImpl &AnnotatedLines, + const AdditionalKeywords &Keywords) { + static const std::unordered_set FoundationIdentifiers = { + "CGFloat", + "NSAffineTransform", + "NSArray", + "NSAttributedString", + "NSCache", + "NSCharacterSet", + "NSCountedSet", + "NSData", + "NSDataDetector", + "NSDecimal", + "NSDecimalNumber", + "NSDictionary", + "NSEdgeInsets", + "NSHashTable", + "NSIndexPath", + "NSIndexSet", + "NSInteger", + "NSLocale", + "NSMapTable", + "NSMutableArray", + "NSMutableAttributedString", + "NSMutableCharacterSet", + "NSMutableData", + "NSMutableDictionary", + "NSMutableIndexSet", + "NSMutableOrderedSet", + "NSMutableSet", + "NSMutableString", + "NSNumber", + "NSNumberFormatter", + "NSOrderedSet", + "NSPoint", + "NSPointerArray", + "NSRange", + "NSRect", + "NSRegularExpression", + "NSSet", + "NSSize", + "NSString", + "NSUInteger", + "NSURL", + "NSURLComponents", + "NSURLQueryItem", + "NSUUID", + }; + + for (auto &Line : AnnotatedLines) { + for (FormatToken *FormatTok = Line->First->Next; FormatTok; + FormatTok = FormatTok->Next) { + if ((FormatTok->Previous->is(tok::at) && + (FormatTok->isObjCAtKeyword(tok::objc_interface) || + FormatTok->isObjCAtKeyword(tok::objc_implementation) || + FormatTok->isObjCAtKeyword(tok::objc_protocol) || + FormatTok->isObjCAtKeyword(tok::objc_end) || + FormatTok->isOneOf(tok::numeric_constant, tok::l_square, + tok::l_brace))) || + (FormatTok->Tok.isAnyIdentifier() && + FoundationIdentifiers.find(FormatTok->TokenText) != + FoundationIdentifiers.end()) || + FormatTok->is(TT_ObjCStringLiteral) || + FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, + TT_ObjCBlockLBrace, TT_ObjCBlockLParen, + TT_ObjCDecl, TT_ObjCForIn, TT_ObjCMethodExpr, + TT_ObjCMethodSpecifier, TT_ObjCProperty)) { + return true; + } + } + } + return false; + } + + bool IsObjC; +}; + struct IncludeDirective { StringRef Filename; StringRef Text; @@ -2185,14 +2292,15 @@ FormatStyle Style = getLLVMStyle(); Style.Language = getLanguageByFileName(FileName); - // This is a very crude detection of whether a header contains ObjC code that - // should be improved over time and probably be done on tokens, not one the - // bare content of the file. - if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h") && - (Code.contains("\n- (") || Code.contains("\n+ (") || - Code.contains("\n@end\n") || Code.contains("\n@end ") || - Code.endswith("@end"))) - Style.Language = FormatStyle::LK_ObjC; + if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h")) { + std::unique_ptr Env = + Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); + ObjCHeaderStyleGuesser Guesser(*Env, Style); + Guesser.process(); + if (Guesser.isObjC()) { + Style.Language = FormatStyle::LK_ObjC; + } + } FormatStyle FallbackStyle = getNoStyle(); if (!getPredefinedStyle(FallbackStyleName, Style.Language, &FallbackStyle)) Index: cfe/trunk/unittests/Format/FormatTestObjC.cpp =================================================================== --- cfe/trunk/unittests/Format/FormatTestObjC.cpp +++ cfe/trunk/unittests/Format/FormatTestObjC.cpp @@ -94,6 +94,66 @@ Style = getStyle("LLVM", "a.h", "none", "void f() {}"); ASSERT_TRUE((bool)Style); EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language); + + Style = getStyle("{}", "a.h", "none", "@interface Foo\n@end\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = getStyle("{}", "a.h", "none", + "const int interface = 1;\nconst int end = 2;\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language); + + Style = getStyle("{}", "a.h", "none", "@protocol Foo\n@end\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = getStyle("{}", "a.h", "none", + "const int protocol = 1;\nconst int end = 2;\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language); + + Style = getStyle("{}", "a.h", "none", "extern NSString *kFoo;\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = + getStyle("{}", "a.h", "none", "typedef NS_ENUM(NSInteger, Foo) {};\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = getStyle("{}", "a.h", "none", "enum Foo {};"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language); + + Style = getStyle("{}", "a.h", "none", "extern NSInteger Foo();\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = + getStyle("{}", "a.h", "none", "inline void Foo() { Log(@\"Foo\"); }\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = + getStyle("{}", "a.h", "none", "inline void Foo() { Log(\"Foo\"); }\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language); + + Style = + getStyle("{}", "a.h", "none", "inline void Foo() { id = @[1, 2, 3]; }\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = getStyle("{}", "a.h", "none", + "inline void Foo() { id foo = @{1: 2, 3: 4, 5: 6}; }\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = getStyle("{}", "a.h", "none", + "inline void Foo() { int foo[] = {1, 2, 3}; }\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language); } TEST_F(FormatTestObjC, FormatObjCTryCatch) {